VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 80253

Last change on this file since 80253 was 80253, checked in by vboxsync, 6 years ago

VMM: Started refactoring VMMAll/* for bugref:9217

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 121.3 KB
Line 
1/* $Id: GVMMR0.cpp 80253 2019-08-13 15:49:33Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2019 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*********************************************************************************************************************************
50* Header Files *
51*********************************************************************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/gvm.h>
57#include <VBox/vmm/vm.h>
58#include <VBox/vmm/vmcpuset.h>
59#include <VBox/vmm/vmm.h>
60#ifdef VBOX_WITH_NEM_R0
61# include <VBox/vmm/nem.h>
62#endif
63#include <VBox/param.h>
64#include <VBox/err.h>
65
66#include <iprt/asm.h>
67#include <iprt/asm-amd64-x86.h>
68#include <iprt/critsect.h>
69#include <iprt/mem.h>
70#include <iprt/semaphore.h>
71#include <iprt/time.h>
72#include <VBox/log.h>
73#include <iprt/thread.h>
74#include <iprt/process.h>
75#include <iprt/param.h>
76#include <iprt/string.h>
77#include <iprt/assert.h>
78#include <iprt/mem.h>
79#include <iprt/memobj.h>
80#include <iprt/mp.h>
81#include <iprt/cpuset.h>
82#include <iprt/spinlock.h>
83#include <iprt/timer.h>
84
85#include "dtrace/VBoxVMM.h"
86
87
88/*********************************************************************************************************************************
89* Defined Constants And Macros *
90*********************************************************************************************************************************/
91#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING)
92/** Define this to enable the periodic preemption timer. */
93# define GVMM_SCHED_WITH_PPT
94#endif
95
96
97/** @def GVMM_CHECK_SMAP_SETUP
98 * SMAP check setup. */
99/** @def GVMM_CHECK_SMAP_CHECK
100 * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
101 * it will be logged and @a a_BadExpr is executed. */
102/** @def GVMM_CHECK_SMAP_CHECK2
103 * Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will
104 * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
105 * executed. */
106#if defined(VBOX_STRICT) || 1
107# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
108# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) \
109 do { \
110 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
111 { \
112 RTCCUINTREG fEflCheck = ASMGetFlags(); \
113 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
114 { /* likely */ } \
115 else \
116 { \
117 SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
118 a_BadExpr; \
119 } \
120 } \
121 } while (0)
122# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) \
123 do { \
124 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
125 { \
126 RTCCUINTREG fEflCheck = ASMGetFlags(); \
127 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
128 { /* likely */ } \
129 else \
130 { \
131 SUPR0BadContext((a_pVM) ? (a_pVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
132 a_BadExpr; \
133 } \
134 } \
135 } while (0)
136#else
137# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0
138# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures)
139# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) NOREF(fKernelFeatures)
140#endif
141
142
143
144/*********************************************************************************************************************************
145* Structures and Typedefs *
146*********************************************************************************************************************************/
147
148/**
149 * Global VM handle.
150 */
151typedef struct GVMHANDLE
152{
153 /** The index of the next handle in the list (free or used). (0 is nil.) */
154 uint16_t volatile iNext;
155 /** Our own index / handle value. */
156 uint16_t iSelf;
157 /** The process ID of the handle owner.
158 * This is used for access checks. */
159 RTPROCESS ProcId;
160 /** The pointer to the ring-0 only (aka global) VM structure. */
161 PGVM pGVM;
162 /** The ring-0 mapping of the shared VM instance data. */
163 PVM pVM;
164 /** The virtual machine object. */
165 void *pvObj;
166 /** The session this VM is associated with. */
167 PSUPDRVSESSION pSession;
168 /** The ring-0 handle of the EMT0 thread.
169 * This is used for ownership checks as well as looking up a VM handle by thread
170 * at times like assertions. */
171 RTNATIVETHREAD hEMT0;
172} GVMHANDLE;
173/** Pointer to a global VM handle. */
174typedef GVMHANDLE *PGVMHANDLE;
175
176/** Number of GVM handles (including the NIL handle). */
177#if HC_ARCH_BITS == 64
178# define GVMM_MAX_HANDLES 8192
179#else
180# define GVMM_MAX_HANDLES 128
181#endif
182
183/**
184 * Per host CPU GVMM data.
185 */
186typedef struct GVMMHOSTCPU
187{
188 /** Magic number (GVMMHOSTCPU_MAGIC). */
189 uint32_t volatile u32Magic;
190 /** The CPU ID. */
191 RTCPUID idCpu;
192 /** The CPU set index. */
193 uint32_t idxCpuSet;
194
195#ifdef GVMM_SCHED_WITH_PPT
196 /** Periodic preemption timer data. */
197 struct
198 {
199 /** The handle to the periodic preemption timer. */
200 PRTTIMER pTimer;
201 /** Spinlock protecting the data below. */
202 RTSPINLOCK hSpinlock;
203 /** The smalles Hz that we need to care about. (static) */
204 uint32_t uMinHz;
205 /** The number of ticks between each historization. */
206 uint32_t cTicksHistoriziationInterval;
207 /** The current historization tick (counting up to
208 * cTicksHistoriziationInterval and then resetting). */
209 uint32_t iTickHistorization;
210 /** The current timer interval. This is set to 0 when inactive. */
211 uint32_t cNsInterval;
212 /** The current timer frequency. This is set to 0 when inactive. */
213 uint32_t uTimerHz;
214 /** The current max frequency reported by the EMTs.
215 * This gets historicize and reset by the timer callback. This is
216 * read without holding the spinlock, so needs atomic updating. */
217 uint32_t volatile uDesiredHz;
218 /** Whether the timer was started or not. */
219 bool volatile fStarted;
220 /** Set if we're starting timer. */
221 bool volatile fStarting;
222 /** The index of the next history entry (mod it). */
223 uint32_t iHzHistory;
224 /** Historicized uDesiredHz values. The array wraps around, new entries
225 * are added at iHzHistory. This is updated approximately every
226 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
227 uint32_t aHzHistory[8];
228 /** Statistics counter for recording the number of interval changes. */
229 uint32_t cChanges;
230 /** Statistics counter for recording the number of timer starts. */
231 uint32_t cStarts;
232 } Ppt;
233#endif /* GVMM_SCHED_WITH_PPT */
234
235} GVMMHOSTCPU;
236/** Pointer to the per host CPU GVMM data. */
237typedef GVMMHOSTCPU *PGVMMHOSTCPU;
238/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
239#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
240/** The interval on history entry should cover (approximately) give in
241 * nanoseconds. */
242#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
243
244
245/**
246 * The GVMM instance data.
247 */
248typedef struct GVMM
249{
250 /** Eyecatcher / magic. */
251 uint32_t u32Magic;
252 /** The index of the head of the free handle chain. (0 is nil.) */
253 uint16_t volatile iFreeHead;
254 /** The index of the head of the active handle chain. (0 is nil.) */
255 uint16_t volatile iUsedHead;
256 /** The number of VMs. */
257 uint16_t volatile cVMs;
258 /** Alignment padding. */
259 uint16_t u16Reserved;
260 /** The number of EMTs. */
261 uint32_t volatile cEMTs;
262 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
263 uint32_t volatile cHaltedEMTs;
264 /** Mini lock for restricting early wake-ups to one thread. */
265 bool volatile fDoingEarlyWakeUps;
266 bool afPadding[3]; /**< explicit alignment padding. */
267 /** When the next halted or sleeping EMT will wake up.
268 * This is set to 0 when it needs recalculating and to UINT64_MAX when
269 * there are no halted or sleeping EMTs in the GVMM. */
270 uint64_t uNsNextEmtWakeup;
271 /** The lock used to serialize VM creation, destruction and associated events that
272 * isn't performance critical. Owners may acquire the list lock. */
273 RTCRITSECT CreateDestroyLock;
274 /** The lock used to serialize used list updates and accesses.
275 * This indirectly includes scheduling since the scheduler will have to walk the
276 * used list to examin running VMs. Owners may not acquire any other locks. */
277 RTCRITSECTRW UsedLock;
278 /** The handle array.
279 * The size of this array defines the maximum number of currently running VMs.
280 * The first entry is unused as it represents the NIL handle. */
281 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
282
283 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
284 * The number of EMTs that means we no longer consider ourselves alone on a
285 * CPU/Core.
286 */
287 uint32_t cEMTsMeansCompany;
288 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
289 * The minimum sleep time for when we're alone, in nano seconds.
290 */
291 uint32_t nsMinSleepAlone;
292 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
293 * The minimum sleep time for when we've got company, in nano seconds.
294 */
295 uint32_t nsMinSleepCompany;
296 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
297 * The limit for the first round of early wake-ups, given in nano seconds.
298 */
299 uint32_t nsEarlyWakeUp1;
300 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
301 * The limit for the second round of early wake-ups, given in nano seconds.
302 */
303 uint32_t nsEarlyWakeUp2;
304
305 /** Set if we're doing early wake-ups.
306 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
307 bool volatile fDoEarlyWakeUps;
308
309 /** The number of entries in the host CPU array (aHostCpus). */
310 uint32_t cHostCpus;
311 /** Per host CPU data (variable length). */
312 GVMMHOSTCPU aHostCpus[1];
313} GVMM;
314AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
315AssertCompileMemberAlignment(GVMM, UsedLock, 8);
316AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
317/** Pointer to the GVMM instance data. */
318typedef GVMM *PGVMM;
319
320/** The GVMM::u32Magic value (Charlie Haden). */
321#define GVMM_MAGIC UINT32_C(0x19370806)
322
323
324
325/*********************************************************************************************************************************
326* Global Variables *
327*********************************************************************************************************************************/
328/** Pointer to the GVMM instance data.
329 * (Just my general dislike for global variables.) */
330static PGVMM g_pGVMM = NULL;
331
332/** Macro for obtaining and validating the g_pGVMM pointer.
333 * On failure it will return from the invoking function with the specified return value.
334 *
335 * @param pGVMM The name of the pGVMM variable.
336 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
337 * status codes.
338 */
339#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
340 do { \
341 (pGVMM) = g_pGVMM;\
342 AssertPtrReturn((pGVMM), (rc)); \
343 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
344 } while (0)
345
346/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
347 * On failure it will return from the invoking function.
348 *
349 * @param pGVMM The name of the pGVMM variable.
350 */
351#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
352 do { \
353 (pGVMM) = g_pGVMM;\
354 AssertPtrReturnVoid((pGVMM)); \
355 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
356 } while (0)
357
358
359/*********************************************************************************************************************************
360* Internal Functions *
361*********************************************************************************************************************************/
362#ifdef VBOX_BUGREF_9217
363static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession);
364#else
365static void gvmmR0InitPerVMData(PGVM pGVM);
366#endif
367static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
368static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock);
369static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM);
370
371#ifdef GVMM_SCHED_WITH_PPT
372static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
373#endif
374
375
376/**
377 * Initializes the GVMM.
378 *
379 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
380 *
381 * @returns VBox status code.
382 */
383GVMMR0DECL(int) GVMMR0Init(void)
384{
385 LogFlow(("GVMMR0Init:\n"));
386
387 /*
388 * Allocate and initialize the instance data.
389 */
390 uint32_t cHostCpus = RTMpGetArraySize();
391 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
392
393 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
394 if (!pGVMM)
395 return VERR_NO_MEMORY;
396 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
397 "GVMM-CreateDestroyLock");
398 if (RT_SUCCESS(rc))
399 {
400 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
401 if (RT_SUCCESS(rc))
402 {
403 pGVMM->u32Magic = GVMM_MAGIC;
404 pGVMM->iUsedHead = 0;
405 pGVMM->iFreeHead = 1;
406
407 /* the nil handle */
408 pGVMM->aHandles[0].iSelf = 0;
409 pGVMM->aHandles[0].iNext = 0;
410
411 /* the tail */
412 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
413 pGVMM->aHandles[i].iSelf = i;
414 pGVMM->aHandles[i].iNext = 0; /* nil */
415
416 /* the rest */
417 while (i-- > 1)
418 {
419 pGVMM->aHandles[i].iSelf = i;
420 pGVMM->aHandles[i].iNext = i + 1;
421 }
422
423 /* The default configuration values. */
424 uint32_t cNsResolution = RTSemEventMultiGetResolution();
425 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
426 if (cNsResolution >= 5*RT_NS_100US)
427 {
428 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
429 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
430 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
431 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
432 }
433 else if (cNsResolution > RT_NS_100US)
434 {
435 pGVMM->nsMinSleepAlone = cNsResolution / 2;
436 pGVMM->nsMinSleepCompany = cNsResolution / 4;
437 pGVMM->nsEarlyWakeUp1 = 0;
438 pGVMM->nsEarlyWakeUp2 = 0;
439 }
440 else
441 {
442 pGVMM->nsMinSleepAlone = 2000;
443 pGVMM->nsMinSleepCompany = 2000;
444 pGVMM->nsEarlyWakeUp1 = 0;
445 pGVMM->nsEarlyWakeUp2 = 0;
446 }
447 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
448
449 /* The host CPU data. */
450 pGVMM->cHostCpus = cHostCpus;
451 uint32_t iCpu = cHostCpus;
452 RTCPUSET PossibleSet;
453 RTMpGetSet(&PossibleSet);
454 while (iCpu-- > 0)
455 {
456 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
457#ifdef GVMM_SCHED_WITH_PPT
458 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
459 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
460 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
461 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
462 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
463 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
464 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
465 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
466 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
467 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
468 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
469 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
470#endif
471
472 if (RTCpuSetIsMember(&PossibleSet, iCpu))
473 {
474 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
475 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
476
477#ifdef GVMM_SCHED_WITH_PPT
478 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
479 50*1000*1000 /* whatever */,
480 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
481 gvmmR0SchedPeriodicPreemptionTimerCallback,
482 &pGVMM->aHostCpus[iCpu]);
483 if (RT_SUCCESS(rc))
484 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
485 if (RT_FAILURE(rc))
486 {
487 while (iCpu < cHostCpus)
488 {
489 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
490 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
491 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
492 iCpu++;
493 }
494 break;
495 }
496#endif
497 }
498 else
499 {
500 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
501 pGVMM->aHostCpus[iCpu].u32Magic = 0;
502 }
503 }
504 if (RT_SUCCESS(rc))
505 {
506 g_pGVMM = pGVMM;
507 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
508 return VINF_SUCCESS;
509 }
510
511 /* bail out. */
512 RTCritSectRwDelete(&pGVMM->UsedLock);
513 }
514 RTCritSectDelete(&pGVMM->CreateDestroyLock);
515 }
516
517 RTMemFree(pGVMM);
518 return rc;
519}
520
521
522/**
523 * Terminates the GVM.
524 *
525 * This is called while owning the loader semaphore (see supdrvLdrFree()).
526 * And unless something is wrong, there should be absolutely no VMs
527 * registered at this point.
528 */
529GVMMR0DECL(void) GVMMR0Term(void)
530{
531 LogFlow(("GVMMR0Term:\n"));
532
533 PGVMM pGVMM = g_pGVMM;
534 g_pGVMM = NULL;
535 if (RT_UNLIKELY(!VALID_PTR(pGVMM)))
536 {
537 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
538 return;
539 }
540
541 /*
542 * First of all, stop all active timers.
543 */
544 uint32_t cActiveTimers = 0;
545 uint32_t iCpu = pGVMM->cHostCpus;
546 while (iCpu-- > 0)
547 {
548 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
549#ifdef GVMM_SCHED_WITH_PPT
550 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
551 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
552 cActiveTimers++;
553#endif
554 }
555 if (cActiveTimers)
556 RTThreadSleep(1); /* fudge */
557
558 /*
559 * Invalidate the and free resources.
560 */
561 pGVMM->u32Magic = ~GVMM_MAGIC;
562 RTCritSectRwDelete(&pGVMM->UsedLock);
563 RTCritSectDelete(&pGVMM->CreateDestroyLock);
564
565 pGVMM->iFreeHead = 0;
566 if (pGVMM->iUsedHead)
567 {
568 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
569 pGVMM->iUsedHead = 0;
570 }
571
572#ifdef GVMM_SCHED_WITH_PPT
573 iCpu = pGVMM->cHostCpus;
574 while (iCpu-- > 0)
575 {
576 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
577 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
578 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
579 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
580 }
581#endif
582
583 RTMemFree(pGVMM);
584}
585
586
587/**
588 * A quick hack for setting global config values.
589 *
590 * @returns VBox status code.
591 *
592 * @param pSession The session handle. Used for authentication.
593 * @param pszName The variable name.
594 * @param u64Value The new value.
595 */
596GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
597{
598 /*
599 * Validate input.
600 */
601 PGVMM pGVMM;
602 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
603 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
604 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
605
606 /*
607 * String switch time!
608 */
609 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
610 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
611 int rc = VINF_SUCCESS;
612 pszName += sizeof("/GVMM/") - 1;
613 if (!strcmp(pszName, "cEMTsMeansCompany"))
614 {
615 if (u64Value <= UINT32_MAX)
616 pGVMM->cEMTsMeansCompany = u64Value;
617 else
618 rc = VERR_OUT_OF_RANGE;
619 }
620 else if (!strcmp(pszName, "MinSleepAlone"))
621 {
622 if (u64Value <= RT_NS_100MS)
623 pGVMM->nsMinSleepAlone = u64Value;
624 else
625 rc = VERR_OUT_OF_RANGE;
626 }
627 else if (!strcmp(pszName, "MinSleepCompany"))
628 {
629 if (u64Value <= RT_NS_100MS)
630 pGVMM->nsMinSleepCompany = u64Value;
631 else
632 rc = VERR_OUT_OF_RANGE;
633 }
634 else if (!strcmp(pszName, "EarlyWakeUp1"))
635 {
636 if (u64Value <= RT_NS_100MS)
637 {
638 pGVMM->nsEarlyWakeUp1 = u64Value;
639 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
640 }
641 else
642 rc = VERR_OUT_OF_RANGE;
643 }
644 else if (!strcmp(pszName, "EarlyWakeUp2"))
645 {
646 if (u64Value <= RT_NS_100MS)
647 {
648 pGVMM->nsEarlyWakeUp2 = u64Value;
649 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
650 }
651 else
652 rc = VERR_OUT_OF_RANGE;
653 }
654 else
655 rc = VERR_CFGM_VALUE_NOT_FOUND;
656 return rc;
657}
658
659
660/**
661 * A quick hack for getting global config values.
662 *
663 * @returns VBox status code.
664 *
665 * @param pSession The session handle. Used for authentication.
666 * @param pszName The variable name.
667 * @param pu64Value Where to return the value.
668 */
669GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
670{
671 /*
672 * Validate input.
673 */
674 PGVMM pGVMM;
675 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
676 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
677 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
678 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
679
680 /*
681 * String switch time!
682 */
683 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
684 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
685 int rc = VINF_SUCCESS;
686 pszName += sizeof("/GVMM/") - 1;
687 if (!strcmp(pszName, "cEMTsMeansCompany"))
688 *pu64Value = pGVMM->cEMTsMeansCompany;
689 else if (!strcmp(pszName, "MinSleepAlone"))
690 *pu64Value = pGVMM->nsMinSleepAlone;
691 else if (!strcmp(pszName, "MinSleepCompany"))
692 *pu64Value = pGVMM->nsMinSleepCompany;
693 else if (!strcmp(pszName, "EarlyWakeUp1"))
694 *pu64Value = pGVMM->nsEarlyWakeUp1;
695 else if (!strcmp(pszName, "EarlyWakeUp2"))
696 *pu64Value = pGVMM->nsEarlyWakeUp2;
697 else
698 rc = VERR_CFGM_VALUE_NOT_FOUND;
699 return rc;
700}
701
702
703/**
704 * Acquire the 'used' lock in shared mode.
705 *
706 * This prevents destruction of the VM while we're in ring-0.
707 *
708 * @returns IPRT status code, see RTSemFastMutexRequest.
709 * @param a_pGVMM The GVMM instance data.
710 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
711 */
712#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
713
714/**
715 * Release the 'used' lock in when owning it in shared mode.
716 *
717 * @returns IPRT status code, see RTSemFastMutexRequest.
718 * @param a_pGVMM The GVMM instance data.
719 * @sa GVMMR0_USED_SHARED_LOCK
720 */
721#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
722
723/**
724 * Acquire the 'used' lock in exclusive mode.
725 *
726 * Only use this function when making changes to the used list.
727 *
728 * @returns IPRT status code, see RTSemFastMutexRequest.
729 * @param a_pGVMM The GVMM instance data.
730 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
731 */
732#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
733
734/**
735 * Release the 'used' lock when owning it in exclusive mode.
736 *
737 * @returns IPRT status code, see RTSemFastMutexRelease.
738 * @param a_pGVMM The GVMM instance data.
739 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
740 */
741#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
742
743
744/**
745 * Try acquire the 'create & destroy' lock.
746 *
747 * @returns IPRT status code, see RTSemFastMutexRequest.
748 * @param pGVMM The GVMM instance data.
749 */
750DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
751{
752 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
753 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
754 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
755 return rc;
756}
757
758
759/**
760 * Release the 'create & destroy' lock.
761 *
762 * @returns IPRT status code, see RTSemFastMutexRequest.
763 * @param pGVMM The GVMM instance data.
764 */
765DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
766{
767 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
768 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
769 AssertRC(rc);
770 return rc;
771}
772
773
774/**
775 * Request wrapper for the GVMMR0CreateVM API.
776 *
777 * @returns VBox status code.
778 * @param pReq The request buffer.
779 * @param pSession The session handle. The VM will be associated with this.
780 */
781GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
782{
783 /*
784 * Validate the request.
785 */
786 if (!VALID_PTR(pReq))
787 return VERR_INVALID_POINTER;
788 if (pReq->Hdr.cbReq != sizeof(*pReq))
789 return VERR_INVALID_PARAMETER;
790 if (pReq->pSession != pSession)
791 return VERR_INVALID_POINTER;
792
793 /*
794 * Execute it.
795 */
796 PVM pVM;
797 pReq->pVMR0 = NULL;
798 pReq->pVMR3 = NIL_RTR3PTR;
799 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pVM);
800 if (RT_SUCCESS(rc))
801 {
802 pReq->pVMR0 = pVM;
803 pReq->pVMR3 = pVM->pVMR3;
804 }
805 return rc;
806}
807
808
809/**
810 * Allocates the VM structure and registers it with GVM.
811 *
812 * The caller will become the VM owner and there by the EMT.
813 *
814 * @returns VBox status code.
815 * @param pSession The support driver session.
816 * @param cCpus Number of virtual CPUs for the new VM.
817 * @param ppVM Where to store the pointer to the VM structure.
818 *
819 * @thread EMT.
820 */
821GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppVM)
822{
823 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
824 PGVMM pGVMM;
825 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
826
827 AssertPtrReturn(ppVM, VERR_INVALID_POINTER);
828 *ppVM = NULL;
829
830 if ( cCpus == 0
831 || cCpus > VMM_MAX_CPU_COUNT)
832 return VERR_INVALID_PARAMETER;
833
834 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
835 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
836 RTPROCESS ProcId = RTProcSelf();
837 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
838
839 /*
840 * The whole allocation process is protected by the lock.
841 */
842 int rc = gvmmR0CreateDestroyLock(pGVMM);
843 AssertRCReturn(rc, rc);
844
845 /*
846 * Only one VM per session.
847 */
848 if (SUPR0GetSessionVM(pSession) != NULL)
849 {
850 gvmmR0CreateDestroyUnlock(pGVMM);
851 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
852 return VERR_ALREADY_EXISTS;
853 }
854
855 /*
856 * Allocate a handle first so we don't waste resources unnecessarily.
857 */
858 uint16_t iHandle = pGVMM->iFreeHead;
859 if (iHandle)
860 {
861 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
862
863 /* consistency checks, a bit paranoid as always. */
864 if ( !pHandle->pVM
865 && !pHandle->pGVM
866 && !pHandle->pvObj
867 && pHandle->iSelf == iHandle)
868 {
869 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
870 if (pHandle->pvObj)
871 {
872 /*
873 * Move the handle from the free to used list and perform permission checks.
874 */
875 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
876 AssertRC(rc);
877
878 pGVMM->iFreeHead = pHandle->iNext;
879 pHandle->iNext = pGVMM->iUsedHead;
880 pGVMM->iUsedHead = iHandle;
881 pGVMM->cVMs++;
882
883 pHandle->pVM = NULL;
884 pHandle->pGVM = NULL;
885 pHandle->pSession = pSession;
886 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
887 pHandle->ProcId = NIL_RTPROCESS;
888
889 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
890
891 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
892 if (RT_SUCCESS(rc))
893 {
894#ifdef VBOX_BUGREF_9217
895 /*
896 * Allocate memory for the VM structure (combined VM + GVM).
897 */
898 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
899 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
900 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
901 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
902 if (RT_SUCCESS(rc))
903 {
904 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
905 AssertPtr(pGVM);
906
907 /*
908 * Initialise the structure.
909 */
910 RT_BZERO(pGVM, cPages << PAGE_SHIFT);
911 gvmmR0InitPerVMData(pGVM, iHandle, cCpus, pSession);
912 GMMR0InitPerVMData(pGVM);
913 pGVM->gvmm.s.VMMemObj = hVMMemObj;
914
915 /*
916 * Allocate page array.
917 * This currently have to be made available to ring-3, but this is should change eventually.
918 */
919 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
920 if (RT_SUCCESS(rc))
921 {
922 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
923 for (uint32_t iPage = 0; iPage < cPages; iPage++)
924 {
925 paPages[iPage].uReserved = 0;
926 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
927 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
928 }
929
930 /*
931 * Map the page array, VM and VMCPU structures into ring-3.
932 */
933 AssertCompileSizeAlignment(VM, PAGE_SIZE);
934 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
935 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
936 0 /*offSub*/, sizeof(VM));
937 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
938 {
939 AssertCompileSizeAlignment(VMCPU, PAGE_SIZE);
940 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
941 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
942 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
943 }
944 if (RT_SUCCESS(rc))
945 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
946 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
947 NIL_RTR0PROCESS);
948 if (RT_SUCCESS(rc))
949 {
950 /*
951 * Initialize all the VM pointer.
952 */
953 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
954 AssertPtr((void *)pVMR3);
955
956 for (VMCPUID i = 0; i < cCpus; i++)
957 {
958 pGVM->aCpus[i].pVMR0 = pGVM;
959 pGVM->aCpus[i].pVMR3 = pVMR3;
960 pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
961 pGVM->apCpusR0[i] = &pGVM->aCpus[i];
962 AssertPtr((void *)pGVM->apCpusR3[i]);
963 }
964
965 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
966 AssertPtr((void *)pGVM->paVMPagesR3);
967
968 /*
969 * Complete the handle - take the UsedLock sem just to be careful.
970 */
971 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
972 AssertRC(rc);
973
974 pHandle->pVM = pGVM;
975 pHandle->pGVM = pGVM;
976 pHandle->hEMT0 = hEMT0;
977 pHandle->ProcId = ProcId;
978 pGVM->pVMR3 = pVMR3;
979 pGVM->aCpus[0].hEMT = hEMT0;
980 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
981 pGVMM->cEMTs += cCpus;
982
983 /* Associate it with the session and create the context hook for EMT0. */
984 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
985 if (RT_SUCCESS(rc))
986 {
987 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
988 if (RT_SUCCESS(rc))
989 {
990 /*
991 * Done!
992 */
993 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
994
995 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
996 gvmmR0CreateDestroyUnlock(pGVMM);
997
998 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
999
1000 *ppVM = pGVM;
1001 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
1002 return VINF_SUCCESS;
1003 }
1004
1005 SUPR0SetSessionVM(pSession, NULL, NULL);
1006 }
1007 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1008 }
1009
1010 /* Cleanup mappings. */
1011 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1012 {
1013 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1014 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1015 }
1016 for (VMCPUID i = 0; i < cCpus; i++)
1017 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1018 {
1019 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
1020 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1021 }
1022 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1023 {
1024 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
1025 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1026 }
1027 }
1028 }
1029
1030#else
1031 /*
1032 * Allocate the global VM structure (GVM) and initialize it.
1033 */
1034 PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]));
1035 if (pGVM)
1036 {
1037 pGVM->u32Magic = GVM_MAGIC;
1038 pGVM->hSelf = iHandle;
1039 pGVM->pVM = NULL;
1040 pGVM->cCpus = cCpus;
1041 pGVM->pSession = pSession;
1042
1043 gvmmR0InitPerVMData(pGVM);
1044 GMMR0InitPerVMData(pGVM);
1045
1046 /*
1047 * Allocate the shared VM structure and associated page array.
1048 */
1049 const uint32_t cbVM = RT_UOFFSETOF_DYN(VM, aCpus[cCpus]);
1050 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
1051 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
1052 if (RT_SUCCESS(rc))
1053 {
1054 PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM);
1055 memset(pVM, 0, cPages << PAGE_SHIFT);
1056 pVM->enmVMState = VMSTATE_CREATING;
1057 pVM->pVMR0 = pVM;
1058 pVM->pSession = pSession;
1059 pVM->hSelf = iHandle;
1060 pVM->cbSelf = cbVM;
1061 pVM->cCpus = cCpus;
1062 pVM->uCpuExecutionCap = 100; /* default is no cap. */
1063 AssertCompileMemberAlignment(VM, cpum, 64);
1064 AssertCompileMemberAlignment(VM, tm, 64);
1065 AssertCompileMemberAlignment(VM, aCpus, PAGE_SIZE);
1066
1067 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
1068 if (RT_SUCCESS(rc))
1069 {
1070 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
1071 for (uint32_t iPage = 0; iPage < cPages; iPage++)
1072 {
1073 paPages[iPage].uReserved = 0;
1074 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
1075 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
1076 }
1077
1078 /*
1079 * Map them into ring-3.
1080 */
1081 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
1082 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
1083 if (RT_SUCCESS(rc))
1084 {
1085 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
1086 pVM->pVMR3 = pVMR3;
1087 AssertPtr((void *)pVMR3);
1088
1089 /* Initialize all the VM pointers. */
1090 for (VMCPUID i = 0; i < cCpus; i++)
1091 {
1092 pVM->aCpus[i].idCpu = i;
1093 pVM->aCpus[i].pVMR0 = pVM;
1094 pVM->aCpus[i].pVMR3 = pVMR3;
1095 pVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1096 pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1097 pVM->apCpusR3[i] = pVMR3 + RT_UOFFSETOF_DYN(VM, aCpus[i]);
1098 pVM->apCpusR0[i] = &pVM->aCpus[i];
1099 }
1100
1101 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
1102 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
1103 NIL_RTR0PROCESS);
1104 if (RT_SUCCESS(rc))
1105 {
1106 pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
1107 AssertPtr((void *)pVM->paVMPagesR3);
1108
1109 /* complete the handle - take the UsedLock sem just to be careful. */
1110 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1111 AssertRC(rc);
1112
1113 pHandle->pVM = pVM;
1114 pHandle->pGVM = pGVM;
1115 pHandle->hEMT0 = hEMT0;
1116 pHandle->ProcId = ProcId;
1117 pGVM->pVM = pVM;
1118 pGVM->pVMR3 = pVMR3;
1119 pGVM->aCpus[0].hEMT = hEMT0;
1120 pVM->aCpus[0].hNativeThreadR0 = hEMT0;
1121 pGVMM->cEMTs += cCpus;
1122
1123 for (VMCPUID i = 0; i < cCpus; i++)
1124 {
1125 pGVM->aCpus[i].pVCpu = &pVM->aCpus[i];
1126 pGVM->aCpus[i].pVM = pVM;
1127 }
1128
1129 /* Associate it with the session and create the context hook for EMT0. */
1130 rc = SUPR0SetSessionVM(pSession, pGVM, pVM);
1131 if (RT_SUCCESS(rc))
1132 {
1133 rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[0]);
1134 if (RT_SUCCESS(rc))
1135 {
1136 /*
1137 * Done!
1138 */
1139 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pVM, ProcId, (void *)hEMT0, cCpus);
1140
1141 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1142 gvmmR0CreateDestroyUnlock(pGVMM);
1143
1144 CPUMR0RegisterVCpuThread(&pVM->aCpus[0]);
1145
1146 *ppVM = pVM;
1147 Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVMR3, pGVM, iHandle));
1148 return VINF_SUCCESS;
1149 }
1150
1151 SUPR0SetSessionVM(pSession, NULL, NULL);
1152 }
1153 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1154 }
1155
1156 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1157 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1158 }
1159 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
1160 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1161 }
1162 RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
1163 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1164 }
1165 }
1166#endif
1167 }
1168 /* else: The user wasn't permitted to create this VM. */
1169
1170 /*
1171 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1172 * object reference here. A little extra mess because of non-recursive lock.
1173 */
1174 void *pvObj = pHandle->pvObj;
1175 pHandle->pvObj = NULL;
1176 gvmmR0CreateDestroyUnlock(pGVMM);
1177
1178 SUPR0ObjRelease(pvObj, pSession);
1179
1180 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1181 return rc;
1182 }
1183
1184 rc = VERR_NO_MEMORY;
1185 }
1186 else
1187 rc = VERR_GVMM_IPE_1;
1188 }
1189 else
1190 rc = VERR_GVM_TOO_MANY_VMS;
1191
1192 gvmmR0CreateDestroyUnlock(pGVMM);
1193 return rc;
1194}
1195
1196
1197#ifdef VBOX_BUGREF_9217
1198/**
1199 * Initializes the per VM data belonging to GVMM.
1200 *
1201 * @param pGVM Pointer to the global VM structure.
1202 */
1203static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession)
1204#else
1205/**
1206 * Initializes the per VM data belonging to GVMM.
1207 *
1208 * @param pGVM Pointer to the global VM structure.
1209 */
1210static void gvmmR0InitPerVMData(PGVM pGVM)
1211#endif
1212{
1213 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1214 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1215#ifdef VBOX_BUGREF_9217
1216 AssertCompileMemberAlignment(VM, cpum, 64);
1217 AssertCompileMemberAlignment(VM, tm, 64);
1218
1219 /* GVM: */
1220 pGVM->u32Magic = GVM_MAGIC;
1221 pGVM->hSelfSafe = hSelf;
1222 pGVM->cCpusSafe = cCpus;
1223 pGVM->pSessionSafe = pSession;
1224
1225 /* VM: */
1226 pGVM->enmVMState = VMSTATE_CREATING;
1227 pGVM->pVMR0 = pGVM;
1228 pGVM->pSession = pSession;
1229 pGVM->hSelf = hSelf;
1230 pGVM->cCpus = cCpus;
1231 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1232 pGVM->uStructVersion = 1;
1233 pGVM->cbSelf = sizeof(VM);
1234 pGVM->cbVCpu = sizeof(VMCPU);
1235#endif
1236
1237 /* GVMM: */
1238 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1239 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1240 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1241 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1242 pGVM->gvmm.s.fDoneVMMR0Init = false;
1243 pGVM->gvmm.s.fDoneVMMR0Term = false;
1244
1245 /*
1246 * Per virtual CPU.
1247 */
1248 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1249 {
1250 pGVM->aCpus[i].idCpu = i;
1251#ifdef VBOX_BUGREF_9217
1252 pGVM->aCpus[i].idCpuSafe = i;
1253#endif
1254 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1255#ifdef VBOX_BUGREF_9217
1256 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1257#endif
1258 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1259 pGVM->aCpus[i].pGVM = pGVM;
1260#ifndef VBOX_BUGREF_9217
1261 pGVM->aCpus[i].pVCpu = NULL;
1262 pGVM->aCpus[i].pVM = NULL;
1263#endif
1264#ifdef VBOX_BUGREF_9217
1265 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1266 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1267 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1268 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1269 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1270#endif
1271 }
1272}
1273
1274
1275/**
1276 * Does the VM initialization.
1277 *
1278 * @returns VBox status code.
1279 * @param pGVM The global (ring-0) VM structure.
1280 */
1281GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1282{
1283 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1284
1285 int rc = VERR_INTERNAL_ERROR_3;
1286 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1287 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1288 {
1289 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1290 {
1291 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1292 if (RT_FAILURE(rc))
1293 {
1294 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1295 break;
1296 }
1297 }
1298 }
1299 else
1300 rc = VERR_WRONG_ORDER;
1301
1302 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1303 return rc;
1304}
1305
1306
1307/**
1308 * Indicates that we're done with the ring-0 initialization
1309 * of the VM.
1310 *
1311 * @param pGVM The global (ring-0) VM structure.
1312 * @thread EMT(0)
1313 */
1314GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1315{
1316 /* Set the indicator. */
1317 pGVM->gvmm.s.fDoneVMMR0Init = true;
1318}
1319
1320
1321/**
1322 * Indicates that we're doing the ring-0 termination of the VM.
1323 *
1324 * @returns true if termination hasn't been done already, false if it has.
1325 * @param pGVM Pointer to the global VM structure. Optional.
1326 * @thread EMT(0) or session cleanup thread.
1327 */
1328GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1329{
1330 /* Validate the VM structure, state and handle. */
1331 AssertPtrReturn(pGVM, false);
1332
1333 /* Set the indicator. */
1334 if (pGVM->gvmm.s.fDoneVMMR0Term)
1335 return false;
1336 pGVM->gvmm.s.fDoneVMMR0Term = true;
1337 return true;
1338}
1339
1340
1341/**
1342 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1343 *
1344 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1345 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1346 * would've been nice if the caller was actually the EMT thread or that we somehow
1347 * could've associated the calling thread with the VM up front.
1348 *
1349 * @returns VBox status code.
1350 * @param pGVM The global (ring-0) VM structure.
1351 * @param pVM The cross context VM structure.
1352 *
1353 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1354 */
1355GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM, PVM pVM)
1356{
1357 LogFlow(("GVMMR0DestroyVM: pGVM=%p pVM=%p\n", pGVM, pVM));
1358 PGVMM pGVMM;
1359 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1360
1361 /*
1362 * Validate the VM structure, state and caller.
1363 */
1364 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1365 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1366 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1367#ifdef VBOX_BUGREF_9217
1368 AssertReturn(pGVM == pVM, VERR_INVALID_POINTER);
1369#else
1370 AssertReturn(pGVM->pVM == pVM, VERR_INVALID_POINTER);
1371#endif
1372 AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState),
1373 VERR_WRONG_ORDER);
1374
1375 uint32_t hGVM = pGVM->hSelf;
1376 ASMCompilerBarrier();
1377 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1378 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1379
1380 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1381 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1382
1383 RTPROCESS ProcId = RTProcSelf();
1384 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1385 AssertReturn( ( pHandle->hEMT0 == hSelf
1386 && pHandle->ProcId == ProcId)
1387 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1388
1389 /*
1390 * Lookup the handle and destroy the object.
1391 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1392 * object, we take some precautions against racing callers just in case...
1393 */
1394 int rc = gvmmR0CreateDestroyLock(pGVMM);
1395 AssertRC(rc);
1396
1397 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1398 if ( pHandle->pVM == pVM
1399 && ( ( pHandle->hEMT0 == hSelf
1400 && pHandle->ProcId == ProcId)
1401 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1402 && VALID_PTR(pHandle->pvObj)
1403 && VALID_PTR(pHandle->pSession)
1404 && VALID_PTR(pHandle->pGVM)
1405 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1406 {
1407 /* Check that other EMTs have deregistered. */
1408 uint32_t cNotDeregistered = 0;
1409 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1410 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */
1411 if (cNotDeregistered == 0)
1412 {
1413 /* Grab the object pointer. */
1414 void *pvObj = pHandle->pvObj;
1415 pHandle->pvObj = NULL;
1416 gvmmR0CreateDestroyUnlock(pGVMM);
1417
1418 SUPR0ObjRelease(pvObj, pHandle->pSession);
1419 }
1420 else
1421 {
1422 gvmmR0CreateDestroyUnlock(pGVMM);
1423 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1424 }
1425 }
1426 else
1427 {
1428 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n",
1429 pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf);
1430 gvmmR0CreateDestroyUnlock(pGVMM);
1431 rc = VERR_GVMM_IPE_2;
1432 }
1433
1434 return rc;
1435}
1436
1437
1438/**
1439 * Performs VM cleanup task as part of object destruction.
1440 *
1441 * @param pGVM The GVM pointer.
1442 */
1443static void gvmmR0CleanupVM(PGVM pGVM)
1444{
1445 if ( pGVM->gvmm.s.fDoneVMMR0Init
1446 && !pGVM->gvmm.s.fDoneVMMR0Term)
1447 {
1448 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1449#ifdef VBOX_BUGREF_9217
1450 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM
1451#else
1452 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM
1453#endif
1454 )
1455 {
1456 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1457#ifdef VBOX_BUGREF_9217
1458 VMMR0TermVM(pGVM, pGVM, NIL_VMCPUID);
1459#else
1460 VMMR0TermVM(pGVM, pGVM->pVM, NIL_VMCPUID);
1461#endif
1462 }
1463 else
1464#ifdef VBOX_BUGREF_9217
1465 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1466#else
1467 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM));
1468#endif
1469 }
1470
1471 GMMR0CleanupVM(pGVM);
1472#ifdef VBOX_WITH_NEM_R0
1473 NEMR0CleanupVM(pGVM);
1474#endif
1475
1476 AssertCompile(NIL_RTTHREADCTXHOOK == (RTTHREADCTXHOOK)0); /* Depends on zero initialized memory working for NIL at the moment. */
1477#ifdef VBOX_BUGREF_9217
1478 for (VMCPUID idCpu = 0; idCpu < pGVM->cCpusSafe; idCpu++)
1479#else
1480 for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++)
1481#endif
1482 {
1483 /** @todo Can we busy wait here for all thread-context hooks to be
1484 * deregistered before releasing (destroying) it? Only until we find a
1485 * solution for not deregistering hooks everytime we're leaving HMR0
1486 * context. */
1487#ifdef VBOX_BUGREF_9217
1488 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1489#else
1490 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->pVM->aCpus[idCpu]);
1491#endif
1492 }
1493}
1494
1495
1496/**
1497 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1498 *
1499 * pvUser1 is the GVM instance pointer.
1500 * pvUser2 is the handle pointer.
1501 */
1502static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1503{
1504 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1505
1506 NOREF(pvObj);
1507
1508 /*
1509 * Some quick, paranoid, input validation.
1510 */
1511 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1512 AssertPtr(pHandle);
1513 PGVMM pGVMM = (PGVMM)pvUser1;
1514 Assert(pGVMM == g_pGVMM);
1515 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1516 if ( !iHandle
1517 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1518 || iHandle != pHandle->iSelf)
1519 {
1520 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1521 return;
1522 }
1523
1524 int rc = gvmmR0CreateDestroyLock(pGVMM);
1525 AssertRC(rc);
1526 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1527 AssertRC(rc);
1528
1529 /*
1530 * This is a tad slow but a doubly linked list is too much hassle.
1531 */
1532 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1533 {
1534 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1535 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1536 gvmmR0CreateDestroyUnlock(pGVMM);
1537 return;
1538 }
1539
1540 if (pGVMM->iUsedHead == iHandle)
1541 pGVMM->iUsedHead = pHandle->iNext;
1542 else
1543 {
1544 uint16_t iPrev = pGVMM->iUsedHead;
1545 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1546 while (iPrev)
1547 {
1548 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1549 {
1550 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1551 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1552 gvmmR0CreateDestroyUnlock(pGVMM);
1553 return;
1554 }
1555 if (RT_UNLIKELY(c-- <= 0))
1556 {
1557 iPrev = 0;
1558 break;
1559 }
1560
1561 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1562 break;
1563 iPrev = pGVMM->aHandles[iPrev].iNext;
1564 }
1565 if (!iPrev)
1566 {
1567 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1568 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1569 gvmmR0CreateDestroyUnlock(pGVMM);
1570 return;
1571 }
1572
1573 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1574 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1575 }
1576 pHandle->iNext = 0;
1577 pGVMM->cVMs--;
1578
1579 /*
1580 * Do the global cleanup round.
1581 */
1582 PGVM pGVM = pHandle->pGVM;
1583 if ( VALID_PTR(pGVM)
1584 && pGVM->u32Magic == GVM_MAGIC)
1585 {
1586 pGVMM->cEMTs -= pGVM->cCpus;
1587
1588 if (pGVM->pSession)
1589 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1590
1591 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1592
1593 gvmmR0CleanupVM(pGVM);
1594
1595 /*
1596 * Do the GVMM cleanup - must be done last.
1597 */
1598 /* The VM and VM pages mappings/allocations. */
1599 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1600 {
1601 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1602 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1603 }
1604
1605 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1606 {
1607 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1608 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1609 }
1610
1611 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1612 {
1613 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1614 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1615 }
1616
1617#ifndef VBOX_BUGREF_9217
1618 if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ)
1619 {
1620 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc);
1621 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1622 }
1623#endif
1624
1625 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1626 {
1627 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1628 {
1629 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1630 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1631 }
1632#ifdef VBOX_BUGREF_9217
1633 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1634 {
1635 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1636 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1637 }
1638#endif
1639 }
1640
1641 /* the GVM structure itself. */
1642 pGVM->u32Magic |= UINT32_C(0x80000000);
1643#ifdef VBOX_BUGREF_9217
1644 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1645 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1646#else
1647 RTMemFree(pGVM);
1648#endif
1649 pGVM = NULL;
1650
1651 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1652 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1653 AssertRC(rc);
1654 }
1655 /* else: GVMMR0CreateVM cleanup. */
1656
1657 /*
1658 * Free the handle.
1659 */
1660 pHandle->iNext = pGVMM->iFreeHead;
1661 pGVMM->iFreeHead = iHandle;
1662 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1663 ASMAtomicWriteNullPtr(&pHandle->pVM);
1664 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1665 ASMAtomicWriteNullPtr(&pHandle->pSession);
1666 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1667 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1668
1669 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1670 gvmmR0CreateDestroyUnlock(pGVMM);
1671 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1672}
1673
1674
1675/**
1676 * Registers the calling thread as the EMT of a Virtual CPU.
1677 *
1678 * Note that VCPU 0 is automatically registered during VM creation.
1679 *
1680 * @returns VBox status code
1681 * @param pGVM The global (ring-0) VM structure.
1682 * @param pVM The cross context VM structure.
1683 * @param idCpu VCPU id to register the current thread as.
1684 */
1685GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1686{
1687 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1688
1689 /*
1690 * Validate the VM structure, state and handle.
1691 */
1692 PGVMM pGVMM;
1693 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /* fTakeUsedLock */); /** @todo take lock here. */
1694 if (RT_SUCCESS(rc))
1695 {
1696 if (idCpu < pGVM->cCpus)
1697 {
1698 /* Check that the EMT isn't already assigned to a thread. */
1699 if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
1700 {
1701#ifdef VBOX_BUGREF_9217
1702 Assert(pGVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1703#else
1704 Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1705#endif
1706
1707 /* A thread may only be one EMT. */
1708 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1709 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1710 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1711 if (RT_SUCCESS(rc))
1712 {
1713 /*
1714 * Do the assignment, then try setup the hook. Undo if that fails.
1715 */
1716#ifdef VBOX_BUGREF_9217
1717 pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1718
1719 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[idCpu]);
1720 if (RT_SUCCESS(rc))
1721 CPUMR0RegisterVCpuThread(&pGVM->aCpus[idCpu]);
1722 else
1723 pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1724#else
1725 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1726
1727 rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[idCpu]);
1728 if (RT_SUCCESS(rc))
1729 CPUMR0RegisterVCpuThread(&pVM->aCpus[idCpu]);
1730 else
1731 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1732#endif
1733 }
1734 }
1735 else
1736 rc = VERR_ACCESS_DENIED;
1737 }
1738 else
1739 rc = VERR_INVALID_CPU_ID;
1740 }
1741 return rc;
1742}
1743
1744
1745/**
1746 * Deregisters the calling thread as the EMT of a Virtual CPU.
1747 *
1748 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1749 *
1750 * @returns VBox status code
1751 * @param pGVM The global (ring-0) VM structure.
1752 * @param pVM The cross context VM structure.
1753 * @param idCpu VCPU id to register the current thread as.
1754 */
1755GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1756{
1757 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1758
1759 /*
1760 * Validate the VM structure, state and handle.
1761 */
1762 PGVMM pGVMM;
1763 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
1764 if (RT_SUCCESS(rc))
1765 {
1766 /*
1767 * Take the destruction lock and recheck the handle state to
1768 * prevent racing GVMMR0DestroyVM.
1769 */
1770 gvmmR0CreateDestroyLock(pGVMM);
1771 uint32_t hSelf = pGVM->hSelf;
1772 ASMCompilerBarrier();
1773 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1774 && pGVMM->aHandles[hSelf].pvObj != NULL
1775 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1776 {
1777 /*
1778 * Do per-EMT cleanups.
1779 */
1780#ifdef VBOX_BUGREF_9217
1781 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1782#else
1783 VMMR0ThreadCtxHookDestroyForEmt(&pVM->aCpus[idCpu]);
1784#endif
1785
1786 /*
1787 * Invalidate hEMT. We don't use NIL here as that would allow
1788 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1789 */
1790 AssertCompile(~(RTNATIVETHREAD)1 != NIL_RTNATIVETHREAD);
1791 pGVM->aCpus[idCpu].hEMT = ~(RTNATIVETHREAD)1;
1792#ifdef VBOX_BUGREF_9217
1793 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1794#else
1795 pVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1796#endif
1797 }
1798
1799 gvmmR0CreateDestroyUnlock(pGVMM);
1800 }
1801 return rc;
1802}
1803
1804
1805/**
1806 * Lookup a GVM structure by its handle.
1807 *
1808 * @returns The GVM pointer on success, NULL on failure.
1809 * @param hGVM The global VM handle. Asserts on bad handle.
1810 */
1811GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1812{
1813 PGVMM pGVMM;
1814 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1815
1816 /*
1817 * Validate.
1818 */
1819 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1820 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1821
1822 /*
1823 * Look it up.
1824 */
1825 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1826 AssertPtrReturn(pHandle->pVM, NULL);
1827 AssertPtrReturn(pHandle->pvObj, NULL);
1828 PGVM pGVM = pHandle->pGVM;
1829 AssertPtrReturn(pGVM, NULL);
1830#ifdef VBOX_BUGREF_9217
1831 AssertReturn(pGVM == pHandle->pVM, NULL);
1832#else
1833 AssertReturn(pGVM->pVM == pHandle->pVM, NULL);
1834#endif
1835
1836 return pHandle->pGVM;
1837}
1838
1839
1840/**
1841 * Lookup a GVM structure by the shared VM structure.
1842 *
1843 * The calling thread must be in the same process as the VM. All current lookups
1844 * are by threads inside the same process, so this will not be an issue.
1845 *
1846 * @returns VBox status code.
1847 * @param pVM The cross context VM structure.
1848 * @param ppGVM Where to store the GVM pointer.
1849 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1850 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1851 * shared mode when requested.
1852 *
1853 * Be very careful if not taking the lock as it's
1854 * possible that the VM will disappear then!
1855 *
1856 * @remark This will not assert on an invalid pVM but try return silently.
1857 */
1858static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1859{
1860 RTPROCESS ProcId = RTProcSelf();
1861 PGVMM pGVMM;
1862 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1863
1864 /*
1865 * Validate.
1866 */
1867 if (RT_UNLIKELY( !VALID_PTR(pVM)
1868 || ((uintptr_t)pVM & PAGE_OFFSET_MASK)))
1869 return VERR_INVALID_POINTER;
1870 if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING
1871 || pVM->enmVMState >= VMSTATE_TERMINATED))
1872 return VERR_INVALID_POINTER;
1873
1874 uint16_t hGVM = pVM->hSelf;
1875 ASMCompilerBarrier();
1876 if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE
1877 || hGVM >= RT_ELEMENTS(pGVMM->aHandles)))
1878 return VERR_INVALID_HANDLE;
1879
1880 /*
1881 * Look it up.
1882 */
1883 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1884 PGVM pGVM;
1885 if (fTakeUsedLock)
1886 {
1887 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1888 AssertRCReturn(rc, rc);
1889
1890 pGVM = pHandle->pGVM;
1891#ifdef VBOX_BUGREF_9217
1892 if (RT_UNLIKELY( pHandle->pVM != pVM
1893 || pHandle->ProcId != ProcId
1894 || !VALID_PTR(pHandle->pvObj)
1895 || !VALID_PTR(pGVM)
1896 || pGVM != pVM))
1897#else
1898 if (RT_UNLIKELY( pHandle->pVM != pVM
1899 || pHandle->ProcId != ProcId
1900 || !VALID_PTR(pHandle->pvObj)
1901 || !VALID_PTR(pGVM)
1902 || pGVM->pVM != pVM))
1903#endif
1904 {
1905 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1906 return VERR_INVALID_HANDLE;
1907 }
1908 }
1909 else
1910 {
1911 if (RT_UNLIKELY(pHandle->pVM != pVM))
1912 return VERR_INVALID_HANDLE;
1913 if (RT_UNLIKELY(pHandle->ProcId != ProcId))
1914 return VERR_INVALID_HANDLE;
1915 if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj)))
1916 return VERR_INVALID_HANDLE;
1917
1918 pGVM = pHandle->pGVM;
1919 if (RT_UNLIKELY(!VALID_PTR(pGVM)))
1920 return VERR_INVALID_HANDLE;
1921#ifdef VBOX_BUGREF_9217
1922 if (RT_UNLIKELY(pGVM != pVM))
1923#else
1924 if (RT_UNLIKELY(pGVM->pVM != pVM))
1925#endif
1926 return VERR_INVALID_HANDLE;
1927 }
1928
1929 *ppGVM = pGVM;
1930 *ppGVMM = pGVMM;
1931 return VINF_SUCCESS;
1932}
1933
1934
1935/**
1936 * Fast look up a GVM structure by the cross context VM structure.
1937 *
1938 * This is mainly used a glue function, so performance is .
1939 *
1940 * @returns GVM on success, NULL on failure.
1941 * @param pVM The cross context VM structure. ASSUMES to be
1942 * reasonably valid, so we can do fewer checks than in
1943 * gvmmR0ByVM.
1944 *
1945 * @note Do not use this on pVM structures from userland!
1946 */
1947GVMMR0DECL(PGVM) GVMMR0FastGetGVMByVM(PVM pVM)
1948{
1949 AssertPtr(pVM);
1950 Assert(!((uintptr_t)pVM & PAGE_OFFSET_MASK));
1951
1952 PGVMM pGVMM;
1953 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1954
1955 /*
1956 * Validate.
1957 */
1958 uint16_t hGVM = pVM->hSelf;
1959 ASMCompilerBarrier();
1960 AssertReturn(hGVM != NIL_GVM_HANDLE && hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1961
1962 /*
1963 * Look it up and check pVM against the value in the handle and GVM structures.
1964 */
1965 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1966 AssertReturn(pHandle->pVM == pVM, NULL);
1967
1968 PGVM pGVM = pHandle->pGVM;
1969 AssertPtrReturn(pGVM, NULL);
1970#ifdef VBOX_BUGREF_9217
1971 AssertReturn(pGVM == pVM, NULL);
1972#else
1973 AssertReturn(pGVM->pVM == pVM, NULL);
1974#endif
1975
1976 return pGVM;
1977}
1978
1979
1980/**
1981 * Check that the given GVM and VM structures match up.
1982 *
1983 * The calling thread must be in the same process as the VM. All current lookups
1984 * are by threads inside the same process, so this will not be an issue.
1985 *
1986 * @returns VBox status code.
1987 * @param pGVM The global (ring-0) VM structure.
1988 * @param pVM The cross context VM structure.
1989 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1990 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1991 * shared mode when requested.
1992 *
1993 * Be very careful if not taking the lock as it's
1994 * possible that the VM will disappear then!
1995 *
1996 * @remark This will not assert on an invalid pVM but try return silently.
1997 */
1998static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1999{
2000 /*
2001 * Check the pointers.
2002 */
2003 int rc;
2004 if (RT_LIKELY(RT_VALID_PTR(pGVM)))
2005 {
2006 if (RT_LIKELY( RT_VALID_PTR(pVM)
2007 && ((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0))
2008 {
2009#ifdef VBOX_BUGREF_9217
2010 if (RT_LIKELY(pGVM == pVM))
2011#else
2012 if (RT_LIKELY(pGVM->pVM == pVM))
2013#endif
2014 {
2015 /*
2016 * Get the pGVMM instance and check the VM handle.
2017 */
2018 PGVMM pGVMM;
2019 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2020
2021 uint16_t hGVM = pGVM->hSelf;
2022 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
2023 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
2024 {
2025 RTPROCESS const pidSelf = RTProcSelf();
2026 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
2027 if (fTakeUsedLock)
2028 {
2029 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2030 AssertRCReturn(rc, rc);
2031 }
2032
2033 if (RT_LIKELY( pHandle->pGVM == pGVM
2034 && pHandle->pVM == pVM
2035 && pHandle->ProcId == pidSelf
2036 && RT_VALID_PTR(pHandle->pvObj)))
2037 {
2038 /*
2039 * Some more VM data consistency checks.
2040 */
2041 if (RT_LIKELY( pVM->cCpus == pGVM->cCpus
2042 && pVM->hSelf == hGVM
2043 && pVM->enmVMState >= VMSTATE_CREATING
2044 && pVM->enmVMState <= VMSTATE_TERMINATED
2045 && pVM->pVMR0 == pVM))
2046 {
2047 *ppGVMM = pGVMM;
2048 return VINF_SUCCESS;
2049 }
2050 }
2051
2052 if (fTakeUsedLock)
2053 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2054 }
2055 }
2056 rc = VERR_INVALID_VM_HANDLE;
2057 }
2058 else
2059 rc = VERR_INVALID_POINTER;
2060 }
2061 else
2062 rc = VERR_INVALID_POINTER;
2063 return rc;
2064}
2065
2066
2067/**
2068 * Check that the given GVM and VM structures match up.
2069 *
2070 * The calling thread must be in the same process as the VM. All current lookups
2071 * are by threads inside the same process, so this will not be an issue.
2072 *
2073 * @returns VBox status code.
2074 * @param pGVM The global (ring-0) VM structure.
2075 * @param pVM The cross context VM structure.
2076 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
2077 * @param ppGVMM Where to store the pointer to the GVMM instance data.
2078 * @thread EMT
2079 *
2080 * @remarks This will assert in all failure paths.
2081 */
2082static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM)
2083{
2084 /*
2085 * Check the pointers.
2086 */
2087 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
2088
2089 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2090 AssertReturn(((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
2091#ifdef VBOX_BUGREF_9217
2092 AssertReturn(pGVM == pVM, VERR_INVALID_VM_HANDLE);
2093#else
2094 AssertReturn(pGVM->pVM == pVM, VERR_INVALID_VM_HANDLE);
2095#endif
2096
2097
2098 /*
2099 * Get the pGVMM instance and check the VM handle.
2100 */
2101 PGVMM pGVMM;
2102 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2103
2104 uint16_t hGVM = pGVM->hSelf;
2105 ASMCompilerBarrier();
2106 AssertReturn( hGVM != NIL_GVM_HANDLE
2107 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
2108
2109 RTPROCESS const pidSelf = RTProcSelf();
2110 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
2111 AssertReturn( pHandle->pGVM == pGVM
2112 && pHandle->pVM == pVM
2113 && pHandle->ProcId == pidSelf
2114 && RT_VALID_PTR(pHandle->pvObj),
2115 VERR_INVALID_HANDLE);
2116
2117 /*
2118 * Check the EMT claim.
2119 */
2120 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
2121 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
2122 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
2123
2124 /*
2125 * Some more VM data consistency checks.
2126 */
2127 AssertReturn(pVM->cCpus == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
2128 AssertReturn(pVM->hSelf == hGVM, VERR_INCONSISTENT_VM_HANDLE);
2129 AssertReturn(pVM->pVMR0 == pVM, VERR_INCONSISTENT_VM_HANDLE);
2130 AssertReturn( pVM->enmVMState >= VMSTATE_CREATING
2131 && pVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
2132
2133 *ppGVMM = pGVMM;
2134 return VINF_SUCCESS;
2135}
2136
2137
2138/**
2139 * Validates a GVM/VM pair.
2140 *
2141 * @returns VBox status code.
2142 * @param pGVM The global (ring-0) VM structure.
2143 * @param pVM The cross context VM structure.
2144 */
2145GVMMR0DECL(int) GVMMR0ValidateGVMandVM(PGVM pGVM, PVM pVM)
2146{
2147 PGVMM pGVMM;
2148 return gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /*fTakeUsedLock*/);
2149}
2150
2151
2152
2153/**
2154 * Validates a GVM/VM/EMT combo.
2155 *
2156 * @returns VBox status code.
2157 * @param pGVM The global (ring-0) VM structure.
2158 * @param pVM The cross context VM structure.
2159 * @param idCpu The Virtual CPU ID of the calling EMT.
2160 * @thread EMT(idCpu)
2161 */
2162GVMMR0DECL(int) GVMMR0ValidateGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2163{
2164 PGVMM pGVMM;
2165 return gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2166}
2167
2168
2169/**
2170 * Looks up the VM belonging to the specified EMT thread.
2171 *
2172 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2173 * unnecessary kernel panics when the EMT thread hits an assertion. The
2174 * call may or not be an EMT thread.
2175 *
2176 * @returns Pointer to the VM on success, NULL on failure.
2177 * @param hEMT The native thread handle of the EMT.
2178 * NIL_RTNATIVETHREAD means the current thread
2179 */
2180GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
2181{
2182 /*
2183 * No Assertions here as we're usually called in a AssertMsgN or
2184 * RTAssert* context.
2185 */
2186 PGVMM pGVMM = g_pGVMM;
2187 if ( !VALID_PTR(pGVMM)
2188 || pGVMM->u32Magic != GVMM_MAGIC)
2189 return NULL;
2190
2191 if (hEMT == NIL_RTNATIVETHREAD)
2192 hEMT = RTThreadNativeSelf();
2193 RTPROCESS ProcId = RTProcSelf();
2194
2195 /*
2196 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2197 */
2198/** @todo introduce some pid hash table here, please. */
2199 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2200 {
2201 if ( pGVMM->aHandles[i].iSelf == i
2202 && pGVMM->aHandles[i].ProcId == ProcId
2203 && VALID_PTR(pGVMM->aHandles[i].pvObj)
2204 && VALID_PTR(pGVMM->aHandles[i].pVM)
2205 && VALID_PTR(pGVMM->aHandles[i].pGVM))
2206 {
2207 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2208 return pGVMM->aHandles[i].pVM;
2209
2210 /* This is fearly safe with the current process per VM approach. */
2211 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2212 VMCPUID const cCpus = pGVM->cCpus;
2213 ASMCompilerBarrier();
2214 if ( cCpus < 1
2215 || cCpus > VMM_MAX_CPU_COUNT)
2216 continue;
2217 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2218 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2219 return pGVMM->aHandles[i].pVM;
2220 }
2221 }
2222 return NULL;
2223}
2224
2225
2226/**
2227 * Looks up the GVMCPU belonging to the specified EMT thread.
2228 *
2229 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2230 * unnecessary kernel panics when the EMT thread hits an assertion. The
2231 * call may or not be an EMT thread.
2232 *
2233 * @returns Pointer to the VM on success, NULL on failure.
2234 * @param hEMT The native thread handle of the EMT.
2235 * NIL_RTNATIVETHREAD means the current thread
2236 */
2237GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
2238{
2239 /*
2240 * No Assertions here as we're usually called in a AssertMsgN,
2241 * RTAssert*, Log and LogRel contexts.
2242 */
2243 PGVMM pGVMM = g_pGVMM;
2244 if ( !VALID_PTR(pGVMM)
2245 || pGVMM->u32Magic != GVMM_MAGIC)
2246 return NULL;
2247
2248 if (hEMT == NIL_RTNATIVETHREAD)
2249 hEMT = RTThreadNativeSelf();
2250 RTPROCESS ProcId = RTProcSelf();
2251
2252 /*
2253 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2254 */
2255/** @todo introduce some pid hash table here, please. */
2256 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2257 {
2258 if ( pGVMM->aHandles[i].iSelf == i
2259 && pGVMM->aHandles[i].ProcId == ProcId
2260 && VALID_PTR(pGVMM->aHandles[i].pvObj)
2261 && VALID_PTR(pGVMM->aHandles[i].pVM)
2262 && VALID_PTR(pGVMM->aHandles[i].pGVM))
2263 {
2264 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2265 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2266 return &pGVM->aCpus[0];
2267
2268 /* This is fearly safe with the current process per VM approach. */
2269 VMCPUID const cCpus = pGVM->cCpus;
2270 ASMCompilerBarrier();
2271 ASMCompilerBarrier();
2272 if ( cCpus < 1
2273 || cCpus > VMM_MAX_CPU_COUNT)
2274 continue;
2275 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2276 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2277 return &pGVM->aCpus[idCpu];
2278 }
2279 }
2280 return NULL;
2281}
2282
2283
2284/**
2285 * This is will wake up expired and soon-to-be expired VMs.
2286 *
2287 * @returns Number of VMs that has been woken up.
2288 * @param pGVMM Pointer to the GVMM instance data.
2289 * @param u64Now The current time.
2290 */
2291static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
2292{
2293 /*
2294 * Skip this if we've got disabled because of high resolution wakeups or by
2295 * the user.
2296 */
2297 if (!pGVMM->fDoEarlyWakeUps)
2298 return 0;
2299
2300/** @todo Rewrite this algorithm. See performance defect XYZ. */
2301
2302 /*
2303 * A cheap optimization to stop wasting so much time here on big setups.
2304 */
2305 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
2306 if ( pGVMM->cHaltedEMTs == 0
2307 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
2308 return 0;
2309
2310 /*
2311 * Only one thread doing this at a time.
2312 */
2313 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
2314 return 0;
2315
2316 /*
2317 * The first pass will wake up VMs which have actually expired
2318 * and look for VMs that should be woken up in the 2nd and 3rd passes.
2319 */
2320 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
2321 uint64_t u64Min = UINT64_MAX;
2322 unsigned cWoken = 0;
2323 unsigned cHalted = 0;
2324 unsigned cTodo2nd = 0;
2325 unsigned cTodo3rd = 0;
2326 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2327 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2328 i = pGVMM->aHandles[i].iNext)
2329 {
2330 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2331 if ( VALID_PTR(pCurGVM)
2332 && pCurGVM->u32Magic == GVM_MAGIC)
2333 {
2334 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2335 {
2336 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2337 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2338 if (u64)
2339 {
2340 if (u64 <= u64Now)
2341 {
2342 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2343 {
2344 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2345 AssertRC(rc);
2346 cWoken++;
2347 }
2348 }
2349 else
2350 {
2351 cHalted++;
2352 if (u64 <= uNsEarlyWakeUp1)
2353 cTodo2nd++;
2354 else if (u64 <= uNsEarlyWakeUp2)
2355 cTodo3rd++;
2356 else if (u64 < u64Min)
2357 u64 = u64Min;
2358 }
2359 }
2360 }
2361 }
2362 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2363 }
2364
2365 if (cTodo2nd)
2366 {
2367 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2368 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2369 i = pGVMM->aHandles[i].iNext)
2370 {
2371 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2372 if ( VALID_PTR(pCurGVM)
2373 && pCurGVM->u32Magic == GVM_MAGIC)
2374 {
2375 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2376 {
2377 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2378 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2379 if ( u64
2380 && u64 <= uNsEarlyWakeUp1)
2381 {
2382 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2383 {
2384 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2385 AssertRC(rc);
2386 cWoken++;
2387 }
2388 }
2389 }
2390 }
2391 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2392 }
2393 }
2394
2395 if (cTodo3rd)
2396 {
2397 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2398 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2399 i = pGVMM->aHandles[i].iNext)
2400 {
2401 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2402 if ( VALID_PTR(pCurGVM)
2403 && pCurGVM->u32Magic == GVM_MAGIC)
2404 {
2405 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2406 {
2407 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2408 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2409 if ( u64
2410 && u64 <= uNsEarlyWakeUp2)
2411 {
2412 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2413 {
2414 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2415 AssertRC(rc);
2416 cWoken++;
2417 }
2418 }
2419 }
2420 }
2421 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2422 }
2423 }
2424
2425 /*
2426 * Set the minimum value.
2427 */
2428 pGVMM->uNsNextEmtWakeup = u64Min;
2429
2430 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2431 return cWoken;
2432}
2433
2434
2435/**
2436 * Halt the EMT thread.
2437 *
2438 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2439 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2440 * @param pGVM The global (ring-0) VM structure.
2441 * @param pVM The cross context VM structure.
2442 * @param pGVCpu The global (ring-0) CPU structure of the calling
2443 * EMT.
2444 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2445 * @thread EMT(pGVCpu).
2446 */
2447GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PVM pVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2448{
2449 LogFlow(("GVMMR0SchedHalt: pGVM=%p pVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2450 pGVM, pVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2451 GVMM_CHECK_SMAP_SETUP();
2452 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2453
2454 PGVMM pGVMM;
2455 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2456
2457 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2458 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2459
2460 /*
2461 * If we're doing early wake-ups, we must take the UsedList lock before we
2462 * start querying the current time.
2463 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2464 */
2465 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2466 if (fDoEarlyWakeUps)
2467 {
2468 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2469 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2470 }
2471
2472 pGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
2473
2474 /* GIP hack: We might are frequently sleeping for short intervals where the
2475 difference between GIP and system time matters on systems with high resolution
2476 system time. So, convert the input from GIP to System time in that case. */
2477 Assert(ASMGetFlags() & X86_EFL_IF);
2478 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2479 const uint64_t u64NowGip = RTTimeNanoTS();
2480 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2481
2482 if (fDoEarlyWakeUps)
2483 {
2484 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2485 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2486 }
2487
2488 /*
2489 * Go to sleep if we must...
2490 * Cap the sleep time to 1 second to be on the safe side.
2491 */
2492 int rc;
2493 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2494 if ( u64NowGip < u64ExpireGipTime
2495 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2496 ? pGVMM->nsMinSleepCompany
2497 : pGVMM->nsMinSleepAlone))
2498 {
2499 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2500 if (cNsInterval > RT_NS_1SEC)
2501 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2502 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2503 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2504 if (fDoEarlyWakeUps)
2505 {
2506 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2507 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2508 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2509 }
2510 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2511
2512 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2513 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2514 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2515 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2516
2517 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2518 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2519
2520 /* Reset the semaphore to try prevent a few false wake-ups. */
2521 if (rc == VINF_SUCCESS)
2522 {
2523 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2524 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2525 }
2526 else if (rc == VERR_TIMEOUT)
2527 {
2528 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2529 rc = VINF_SUCCESS;
2530 }
2531 }
2532 else
2533 {
2534 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2535 if (fDoEarlyWakeUps)
2536 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2537 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2538 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2539 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2540 rc = VINF_SUCCESS;
2541 }
2542
2543 return rc;
2544}
2545
2546
2547/**
2548 * Halt the EMT thread.
2549 *
2550 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2551 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2552 * @param pGVM The global (ring-0) VM structure.
2553 * @param pVM The cross context VM structure.
2554 * @param idCpu The Virtual CPU ID of the calling EMT.
2555 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2556 * @thread EMT(idCpu).
2557 */
2558GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2559{
2560 GVMM_CHECK_SMAP_SETUP();
2561 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2562 PGVMM pGVMM;
2563 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2564 if (RT_SUCCESS(rc))
2565 {
2566 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2567 rc = GVMMR0SchedHalt(pGVM, pVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2568 }
2569 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2570 return rc;
2571}
2572
2573
2574
2575/**
2576 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2577 * the a sleeping EMT.
2578 *
2579 * @retval VINF_SUCCESS if successfully woken up.
2580 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2581 *
2582 * @param pGVM The global (ring-0) VM structure.
2583 * @param pGVCpu The global (ring-0) VCPU structure.
2584 */
2585DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2586{
2587 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2588
2589 /*
2590 * Signal the semaphore regardless of whether it's current blocked on it.
2591 *
2592 * The reason for this is that there is absolutely no way we can be 100%
2593 * certain that it isn't *about* go to go to sleep on it and just got
2594 * delayed a bit en route. So, we will always signal the semaphore when
2595 * the it is flagged as halted in the VMM.
2596 */
2597/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2598 int rc;
2599 if (pGVCpu->gvmm.s.u64HaltExpire)
2600 {
2601 rc = VINF_SUCCESS;
2602 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2603 }
2604 else
2605 {
2606 rc = VINF_GVM_NOT_BLOCKED;
2607 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2608 }
2609
2610 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2611 AssertRC(rc2);
2612
2613 return rc;
2614}
2615
2616
2617/**
2618 * Wakes up the halted EMT thread so it can service a pending request.
2619 *
2620 * @returns VBox status code.
2621 * @retval VINF_SUCCESS if successfully woken up.
2622 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2623 *
2624 * @param pGVM The global (ring-0) VM structure.
2625 * @param pVM The cross context VM structure.
2626 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2627 * @param fTakeUsedLock Take the used lock or not
2628 * @thread Any but EMT(idCpu).
2629 */
2630GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
2631{
2632 GVMM_CHECK_SMAP_SETUP();
2633 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2634
2635 /*
2636 * Validate input and take the UsedLock.
2637 */
2638 PGVMM pGVMM;
2639 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
2640 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2641 if (RT_SUCCESS(rc))
2642 {
2643 if (idCpu < pGVM->cCpus)
2644 {
2645 /*
2646 * Do the actual job.
2647 */
2648 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2649 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2650
2651 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2652 {
2653 /*
2654 * While we're here, do a round of scheduling.
2655 */
2656 Assert(ASMGetFlags() & X86_EFL_IF);
2657 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2658 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2659 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2660 }
2661 }
2662 else
2663 rc = VERR_INVALID_CPU_ID;
2664
2665 if (fTakeUsedLock)
2666 {
2667 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2668 AssertRC(rc2);
2669 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2670 }
2671 }
2672
2673 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2674 return rc;
2675}
2676
2677
2678/**
2679 * Wakes up the halted EMT thread so it can service a pending request.
2680 *
2681 * @returns VBox status code.
2682 * @retval VINF_SUCCESS if successfully woken up.
2683 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2684 *
2685 * @param pGVM The global (ring-0) VM structure.
2686 * @param pVM The cross context VM structure.
2687 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2688 * @thread Any but EMT(idCpu).
2689 */
2690GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2691{
2692 return GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
2693}
2694
2695
2696/**
2697 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2698 * parameter and no used locking.
2699 *
2700 * @returns VBox status code.
2701 * @retval VINF_SUCCESS if successfully woken up.
2702 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2703 *
2704 * @param pVM The cross context VM structure.
2705 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2706 * @thread Any but EMT(idCpu).
2707 * @deprecated Don't use in new code if possible! Use the GVM variant.
2708 */
2709GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PVM pVM, VMCPUID idCpu)
2710{
2711 GVMM_CHECK_SMAP_SETUP();
2712 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2713 PGVM pGVM;
2714 PGVMM pGVMM;
2715 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/);
2716 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2717 if (RT_SUCCESS(rc))
2718 rc = GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, false /*fTakeUsedLock*/);
2719 return rc;
2720}
2721
2722
2723/**
2724 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2725 * the Virtual CPU if it's still busy executing guest code.
2726 *
2727 * @returns VBox status code.
2728 * @retval VINF_SUCCESS if poked successfully.
2729 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2730 *
2731 * @param pGVM The global (ring-0) VM structure.
2732 * @param pVCpu The cross context virtual CPU structure.
2733 */
2734DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPU pVCpu)
2735{
2736 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2737
2738 RTCPUID idHostCpu = pVCpu->idHostCpu;
2739 if ( idHostCpu == NIL_RTCPUID
2740 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2741 {
2742 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2743 return VINF_GVM_NOT_BUSY_IN_GC;
2744 }
2745
2746 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2747 RTMpPokeCpu(idHostCpu);
2748 return VINF_SUCCESS;
2749}
2750
2751
2752/**
2753 * Pokes an EMT if it's still busy running guest code.
2754 *
2755 * @returns VBox status code.
2756 * @retval VINF_SUCCESS if poked successfully.
2757 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2758 *
2759 * @param pGVM The global (ring-0) VM structure.
2760 * @param pVM The cross context VM structure.
2761 * @param idCpu The ID of the virtual CPU to poke.
2762 * @param fTakeUsedLock Take the used lock or not
2763 */
2764GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
2765{
2766 /*
2767 * Validate input and take the UsedLock.
2768 */
2769 PGVMM pGVMM;
2770 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
2771 if (RT_SUCCESS(rc))
2772 {
2773 if (idCpu < pGVM->cCpus)
2774#ifdef VBOX_BUGREF_9217
2775 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2776#else
2777 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2778#endif
2779 else
2780 rc = VERR_INVALID_CPU_ID;
2781
2782 if (fTakeUsedLock)
2783 {
2784 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2785 AssertRC(rc2);
2786 }
2787 }
2788
2789 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2790 return rc;
2791}
2792
2793
2794/**
2795 * Pokes an EMT if it's still busy running guest code.
2796 *
2797 * @returns VBox status code.
2798 * @retval VINF_SUCCESS if poked successfully.
2799 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2800 *
2801 * @param pGVM The global (ring-0) VM structure.
2802 * @param pVM The cross context VM structure.
2803 * @param idCpu The ID of the virtual CPU to poke.
2804 */
2805GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2806{
2807 return GVMMR0SchedPokeEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
2808}
2809
2810
2811/**
2812 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2813 * used locking.
2814 *
2815 * @returns VBox status code.
2816 * @retval VINF_SUCCESS if poked successfully.
2817 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2818 *
2819 * @param pVM The cross context VM structure.
2820 * @param idCpu The ID of the virtual CPU to poke.
2821 *
2822 * @deprecated Don't use in new code if possible! Use the GVM variant.
2823 */
2824GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PVM pVM, VMCPUID idCpu)
2825{
2826 PGVM pGVM;
2827 PGVMM pGVMM;
2828 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/);
2829 if (RT_SUCCESS(rc))
2830 {
2831 if (idCpu < pGVM->cCpus)
2832#ifdef VBOX_BUGREF_9217
2833 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2834#else
2835 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2836#endif
2837 else
2838 rc = VERR_INVALID_CPU_ID;
2839 }
2840 return rc;
2841}
2842
2843
2844/**
2845 * Wakes up a set of halted EMT threads so they can service pending request.
2846 *
2847 * @returns VBox status code, no informational stuff.
2848 *
2849 * @param pGVM The global (ring-0) VM structure.
2850 * @param pVM The cross context VM structure.
2851 * @param pSleepSet The set of sleepers to wake up.
2852 * @param pPokeSet The set of CPUs to poke.
2853 */
2854GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PVM pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2855{
2856 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2857 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2858 GVMM_CHECK_SMAP_SETUP();
2859 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2860 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2861
2862 /*
2863 * Validate input and take the UsedLock.
2864 */
2865 PGVMM pGVMM;
2866 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /* fTakeUsedLock */);
2867 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2868 if (RT_SUCCESS(rc))
2869 {
2870 rc = VINF_SUCCESS;
2871 VMCPUID idCpu = pGVM->cCpus;
2872 while (idCpu-- > 0)
2873 {
2874 /* Don't try poke or wake up ourselves. */
2875 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2876 continue;
2877
2878 /* just ignore errors for now. */
2879 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2880 {
2881 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2882 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2883 }
2884 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2885 {
2886#ifdef VBOX_BUGREF_9217
2887 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2888#else
2889 gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2890#endif
2891 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2892 }
2893 }
2894
2895 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2896 AssertRC(rc2);
2897 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2898 }
2899
2900 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2901 return rc;
2902}
2903
2904
2905/**
2906 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2907 *
2908 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2909 * @param pGVM The global (ring-0) VM structure.
2910 * @param pVM The cross context VM structure.
2911 * @param pReq Pointer to the request packet.
2912 */
2913GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PVM pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2914{
2915 /*
2916 * Validate input and pass it on.
2917 */
2918 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2919 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2920
2921 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, pVM, &pReq->SleepSet, &pReq->PokeSet);
2922}
2923
2924
2925
2926/**
2927 * Poll the schedule to see if someone else should get a chance to run.
2928 *
2929 * This is a bit hackish and will not work too well if the machine is
2930 * under heavy load from non-VM processes.
2931 *
2932 * @returns VINF_SUCCESS if not yielded.
2933 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2934 * @param pGVM The global (ring-0) VM structure.
2935 * @param pVM The cross context VM structure.
2936 * @param idCpu The Virtual CPU ID of the calling EMT.
2937 * @param fYield Whether to yield or not.
2938 * This is for when we're spinning in the halt loop.
2939 * @thread EMT(idCpu).
2940 */
2941GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fYield)
2942{
2943 /*
2944 * Validate input.
2945 */
2946 PGVMM pGVMM;
2947 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2948 if (RT_SUCCESS(rc))
2949 {
2950 /*
2951 * We currently only implement helping doing wakeups (fYield = false), so don't
2952 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2953 */
2954 if (!fYield && pGVMM->fDoEarlyWakeUps)
2955 {
2956 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2957 pGVM->gvmm.s.StatsSched.cPollCalls++;
2958
2959 Assert(ASMGetFlags() & X86_EFL_IF);
2960 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2961
2962 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2963
2964 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2965 }
2966 /*
2967 * Not quite sure what we could do here...
2968 */
2969 else if (fYield)
2970 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2971 else
2972 rc = VINF_SUCCESS;
2973 }
2974
2975 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2976 return rc;
2977}
2978
2979
2980#ifdef GVMM_SCHED_WITH_PPT
2981/**
2982 * Timer callback for the periodic preemption timer.
2983 *
2984 * @param pTimer The timer handle.
2985 * @param pvUser Pointer to the per cpu structure.
2986 * @param iTick The current tick.
2987 */
2988static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2989{
2990 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2991 NOREF(pTimer); NOREF(iTick);
2992
2993 /*
2994 * Termination check
2995 */
2996 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2997 return;
2998
2999 /*
3000 * Do the house keeping.
3001 */
3002 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3003
3004 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
3005 {
3006 /*
3007 * Historicize the max frequency.
3008 */
3009 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
3010 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
3011 pCpu->Ppt.iTickHistorization = 0;
3012 pCpu->Ppt.uDesiredHz = 0;
3013
3014 /*
3015 * Check if the current timer frequency.
3016 */
3017 uint32_t uHistMaxHz = 0;
3018 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
3019 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
3020 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
3021 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
3022 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3023 else if (uHistMaxHz)
3024 {
3025 /*
3026 * Reprogram it.
3027 */
3028 pCpu->Ppt.cChanges++;
3029 pCpu->Ppt.iTickHistorization = 0;
3030 pCpu->Ppt.uTimerHz = uHistMaxHz;
3031 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
3032 pCpu->Ppt.cNsInterval = cNsInterval;
3033 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3034 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3035 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3036 / cNsInterval;
3037 else
3038 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3039 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3040
3041 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
3042 RTTimerChangeInterval(pTimer, cNsInterval);
3043 }
3044 else
3045 {
3046 /*
3047 * Stop it.
3048 */
3049 pCpu->Ppt.fStarted = false;
3050 pCpu->Ppt.uTimerHz = 0;
3051 pCpu->Ppt.cNsInterval = 0;
3052 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3053
3054 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
3055 RTTimerStop(pTimer);
3056 }
3057 }
3058 else
3059 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3060}
3061#endif /* GVMM_SCHED_WITH_PPT */
3062
3063
3064/**
3065 * Updates the periodic preemption timer for the calling CPU.
3066 *
3067 * The caller must have disabled preemption!
3068 * The caller must check that the host can do high resolution timers.
3069 *
3070 * @param pVM The cross context VM structure.
3071 * @param idHostCpu The current host CPU id.
3072 * @param uHz The desired frequency.
3073 */
3074GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz)
3075{
3076 NOREF(pVM);
3077#ifdef GVMM_SCHED_WITH_PPT
3078 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3079 Assert(RTTimerCanDoHighResolution());
3080
3081 /*
3082 * Resolve the per CPU data.
3083 */
3084 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
3085 PGVMM pGVMM = g_pGVMM;
3086 if ( !VALID_PTR(pGVMM)
3087 || pGVMM->u32Magic != GVMM_MAGIC)
3088 return;
3089 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
3090 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
3091 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
3092 && pCpu->idCpu == idHostCpu,
3093 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
3094
3095 /*
3096 * Check whether we need to do anything about the timer.
3097 * We have to be a little bit careful since we might be race the timer
3098 * callback here.
3099 */
3100 if (uHz > 16384)
3101 uHz = 16384; /** @todo add a query method for this! */
3102 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
3103 && uHz >= pCpu->Ppt.uMinHz
3104 && !pCpu->Ppt.fStarting /* solaris paranoia */))
3105 {
3106 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3107
3108 pCpu->Ppt.uDesiredHz = uHz;
3109 uint32_t cNsInterval = 0;
3110 if (!pCpu->Ppt.fStarted)
3111 {
3112 pCpu->Ppt.cStarts++;
3113 pCpu->Ppt.fStarted = true;
3114 pCpu->Ppt.fStarting = true;
3115 pCpu->Ppt.iTickHistorization = 0;
3116 pCpu->Ppt.uTimerHz = uHz;
3117 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
3118 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3119 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3120 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3121 / cNsInterval;
3122 else
3123 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3124 }
3125
3126 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3127
3128 if (cNsInterval)
3129 {
3130 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
3131 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
3132 AssertRC(rc);
3133
3134 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3135 if (RT_FAILURE(rc))
3136 pCpu->Ppt.fStarted = false;
3137 pCpu->Ppt.fStarting = false;
3138 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3139 }
3140 }
3141#else /* !GVMM_SCHED_WITH_PPT */
3142 NOREF(idHostCpu); NOREF(uHz);
3143#endif /* !GVMM_SCHED_WITH_PPT */
3144}
3145
3146
3147/**
3148 * Retrieves the GVMM statistics visible to the caller.
3149 *
3150 * @returns VBox status code.
3151 *
3152 * @param pStats Where to put the statistics.
3153 * @param pSession The current session.
3154 * @param pGVM The GVM to obtain statistics for. Optional.
3155 * @param pVM The VM structure corresponding to @a pGVM.
3156 */
3157GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
3158{
3159 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
3160
3161 /*
3162 * Validate input.
3163 */
3164 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3165 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3166 pStats->cVMs = 0; /* (crash before taking the sem...) */
3167
3168 /*
3169 * Take the lock and get the VM statistics.
3170 */
3171 PGVMM pGVMM;
3172 if (pGVM)
3173 {
3174 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/);
3175 if (RT_FAILURE(rc))
3176 return rc;
3177 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
3178 }
3179 else
3180 {
3181 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3182 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
3183
3184 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3185 AssertRCReturn(rc, rc);
3186 }
3187
3188 /*
3189 * Enumerate the VMs and add the ones visible to the statistics.
3190 */
3191 pStats->cVMs = 0;
3192 pStats->cEMTs = 0;
3193 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
3194
3195 for (unsigned i = pGVMM->iUsedHead;
3196 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3197 i = pGVMM->aHandles[i].iNext)
3198 {
3199 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3200 void *pvObj = pGVMM->aHandles[i].pvObj;
3201 if ( VALID_PTR(pvObj)
3202 && VALID_PTR(pOtherGVM)
3203 && pOtherGVM->u32Magic == GVM_MAGIC
3204 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3205 {
3206 pStats->cVMs++;
3207 pStats->cEMTs += pOtherGVM->cCpus;
3208
3209 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
3210 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
3211 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
3212 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
3213 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
3214
3215 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
3216 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
3217 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
3218
3219 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
3220 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
3221
3222 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
3223 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
3224 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
3225 }
3226 }
3227
3228 /*
3229 * Copy out the per host CPU statistics.
3230 */
3231 uint32_t iDstCpu = 0;
3232 uint32_t cSrcCpus = pGVMM->cHostCpus;
3233 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
3234 {
3235 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
3236 {
3237 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
3238 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
3239#ifdef GVMM_SCHED_WITH_PPT
3240 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
3241 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
3242 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
3243 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
3244#else
3245 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
3246 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
3247 pStats->aHostCpus[iDstCpu].cChanges = 0;
3248 pStats->aHostCpus[iDstCpu].cStarts = 0;
3249#endif
3250 iDstCpu++;
3251 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
3252 break;
3253 }
3254 }
3255 pStats->cHostCpus = iDstCpu;
3256
3257 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3258
3259 return VINF_SUCCESS;
3260}
3261
3262
3263/**
3264 * VMMR0 request wrapper for GVMMR0QueryStatistics.
3265 *
3266 * @returns see GVMMR0QueryStatistics.
3267 * @param pGVM The global (ring-0) VM structure. Optional.
3268 * @param pVM The cross context VM structure. Optional.
3269 * @param pReq Pointer to the request packet.
3270 * @param pSession The current session.
3271 */
3272GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PVM pVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3273{
3274 /*
3275 * Validate input and pass it on.
3276 */
3277 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3278 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3279 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3280
3281 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM, pVM);
3282}
3283
3284
3285/**
3286 * Resets the specified GVMM statistics.
3287 *
3288 * @returns VBox status code.
3289 *
3290 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
3291 * @param pSession The current session.
3292 * @param pGVM The GVM to reset statistics for. Optional.
3293 * @param pVM The VM structure corresponding to @a pGVM.
3294 */
3295GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
3296{
3297 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
3298
3299 /*
3300 * Validate input.
3301 */
3302 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3303 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3304
3305 /*
3306 * Take the lock and get the VM statistics.
3307 */
3308 PGVMM pGVMM;
3309 if (pGVM)
3310 {
3311 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/);
3312 if (RT_FAILURE(rc))
3313 return rc;
3314# define MAYBE_RESET_FIELD(field) \
3315 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3316 MAYBE_RESET_FIELD(cHaltCalls);
3317 MAYBE_RESET_FIELD(cHaltBlocking);
3318 MAYBE_RESET_FIELD(cHaltTimeouts);
3319 MAYBE_RESET_FIELD(cHaltNotBlocking);
3320 MAYBE_RESET_FIELD(cHaltWakeUps);
3321 MAYBE_RESET_FIELD(cWakeUpCalls);
3322 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3323 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3324 MAYBE_RESET_FIELD(cPokeCalls);
3325 MAYBE_RESET_FIELD(cPokeNotBusy);
3326 MAYBE_RESET_FIELD(cPollCalls);
3327 MAYBE_RESET_FIELD(cPollHalts);
3328 MAYBE_RESET_FIELD(cPollWakeUps);
3329# undef MAYBE_RESET_FIELD
3330 }
3331 else
3332 {
3333 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3334
3335 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3336 AssertRCReturn(rc, rc);
3337 }
3338
3339 /*
3340 * Enumerate the VMs and add the ones visible to the statistics.
3341 */
3342 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
3343 {
3344 for (unsigned i = pGVMM->iUsedHead;
3345 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3346 i = pGVMM->aHandles[i].iNext)
3347 {
3348 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3349 void *pvObj = pGVMM->aHandles[i].pvObj;
3350 if ( VALID_PTR(pvObj)
3351 && VALID_PTR(pOtherGVM)
3352 && pOtherGVM->u32Magic == GVM_MAGIC
3353 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3354 {
3355# define MAYBE_RESET_FIELD(field) \
3356 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3357 MAYBE_RESET_FIELD(cHaltCalls);
3358 MAYBE_RESET_FIELD(cHaltBlocking);
3359 MAYBE_RESET_FIELD(cHaltTimeouts);
3360 MAYBE_RESET_FIELD(cHaltNotBlocking);
3361 MAYBE_RESET_FIELD(cHaltWakeUps);
3362 MAYBE_RESET_FIELD(cWakeUpCalls);
3363 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3364 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3365 MAYBE_RESET_FIELD(cPokeCalls);
3366 MAYBE_RESET_FIELD(cPokeNotBusy);
3367 MAYBE_RESET_FIELD(cPollCalls);
3368 MAYBE_RESET_FIELD(cPollHalts);
3369 MAYBE_RESET_FIELD(cPollWakeUps);
3370# undef MAYBE_RESET_FIELD
3371 }
3372 }
3373 }
3374
3375 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3376
3377 return VINF_SUCCESS;
3378}
3379
3380
3381/**
3382 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3383 *
3384 * @returns see GVMMR0ResetStatistics.
3385 * @param pGVM The global (ring-0) VM structure. Optional.
3386 * @param pVM The cross context VM structure. Optional.
3387 * @param pReq Pointer to the request packet.
3388 * @param pSession The current session.
3389 */
3390GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PVM pVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3391{
3392 /*
3393 * Validate input and pass it on.
3394 */
3395 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3396 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3397 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3398
3399 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM, pVM);
3400}
3401
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette