VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 72186

Last change on this file since 72186 was 71222, checked in by vboxsync, 7 years ago

NEM/win,VMM,PGM: Ported NEM runloop to ring-0. bugref:9044

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 105.9 KB
Line 
1/* $Id: GVMMR0.cpp 71222 2018-03-05 22:07:48Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2017 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*********************************************************************************************************************************
50* Header Files *
51*********************************************************************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/gvm.h>
57#include <VBox/vmm/vm.h>
58#include <VBox/vmm/vmcpuset.h>
59#include <VBox/vmm/vmm.h>
60#ifdef VBOX_WITH_NEM_R0
61# include <VBox/vmm/nem.h>
62#endif
63#include <VBox/param.h>
64#include <VBox/err.h>
65
66#include <iprt/asm.h>
67#include <iprt/asm-amd64-x86.h>
68#include <iprt/critsect.h>
69#include <iprt/mem.h>
70#include <iprt/semaphore.h>
71#include <iprt/time.h>
72#include <VBox/log.h>
73#include <iprt/thread.h>
74#include <iprt/process.h>
75#include <iprt/param.h>
76#include <iprt/string.h>
77#include <iprt/assert.h>
78#include <iprt/mem.h>
79#include <iprt/memobj.h>
80#include <iprt/mp.h>
81#include <iprt/cpuset.h>
82#include <iprt/spinlock.h>
83#include <iprt/timer.h>
84
85#include "dtrace/VBoxVMM.h"
86
87
88/*********************************************************************************************************************************
89* Defined Constants And Macros *
90*********************************************************************************************************************************/
91#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING)
92/** Define this to enable the periodic preemption timer. */
93# define GVMM_SCHED_WITH_PPT
94#endif
95
96
97/** @def GVMM_CHECK_SMAP_SETUP
98 * SMAP check setup. */
99/** @def GVMM_CHECK_SMAP_CHECK
100 * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
101 * it will be logged and @a a_BadExpr is executed. */
102/** @def GVMM_CHECK_SMAP_CHECK2
103 * Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will
104 * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
105 * executed. */
106#if defined(VBOX_STRICT) || 1
107# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
108# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) \
109 do { \
110 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
111 { \
112 RTCCUINTREG fEflCheck = ASMGetFlags(); \
113 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
114 { /* likely */ } \
115 else \
116 { \
117 SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
118 a_BadExpr; \
119 } \
120 } \
121 } while (0)
122# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) \
123 do { \
124 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
125 { \
126 RTCCUINTREG fEflCheck = ASMGetFlags(); \
127 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
128 { /* likely */ } \
129 else \
130 { \
131 SUPR0BadContext((a_pVM) ? (a_pVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
132 a_BadExpr; \
133 } \
134 } \
135 } while (0)
136#else
137# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0
138# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures)
139# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) NOREF(fKernelFeatures)
140#endif
141
142
143
144/*********************************************************************************************************************************
145* Structures and Typedefs *
146*********************************************************************************************************************************/
147
148/**
149 * Global VM handle.
150 */
151typedef struct GVMHANDLE
152{
153 /** The index of the next handle in the list (free or used). (0 is nil.) */
154 uint16_t volatile iNext;
155 /** Our own index / handle value. */
156 uint16_t iSelf;
157 /** The process ID of the handle owner.
158 * This is used for access checks. */
159 RTPROCESS ProcId;
160 /** The pointer to the ring-0 only (aka global) VM structure. */
161 PGVM pGVM;
162 /** The ring-0 mapping of the shared VM instance data. */
163 PVM pVM;
164 /** The virtual machine object. */
165 void *pvObj;
166 /** The session this VM is associated with. */
167 PSUPDRVSESSION pSession;
168 /** The ring-0 handle of the EMT0 thread.
169 * This is used for ownership checks as well as looking up a VM handle by thread
170 * at times like assertions. */
171 RTNATIVETHREAD hEMT0;
172} GVMHANDLE;
173/** Pointer to a global VM handle. */
174typedef GVMHANDLE *PGVMHANDLE;
175
176/** Number of GVM handles (including the NIL handle). */
177#if HC_ARCH_BITS == 64
178# define GVMM_MAX_HANDLES 8192
179#else
180# define GVMM_MAX_HANDLES 128
181#endif
182
183/**
184 * Per host CPU GVMM data.
185 */
186typedef struct GVMMHOSTCPU
187{
188 /** Magic number (GVMMHOSTCPU_MAGIC). */
189 uint32_t volatile u32Magic;
190 /** The CPU ID. */
191 RTCPUID idCpu;
192 /** The CPU set index. */
193 uint32_t idxCpuSet;
194
195#ifdef GVMM_SCHED_WITH_PPT
196 /** Periodic preemption timer data. */
197 struct
198 {
199 /** The handle to the periodic preemption timer. */
200 PRTTIMER pTimer;
201 /** Spinlock protecting the data below. */
202 RTSPINLOCK hSpinlock;
203 /** The smalles Hz that we need to care about. (static) */
204 uint32_t uMinHz;
205 /** The number of ticks between each historization. */
206 uint32_t cTicksHistoriziationInterval;
207 /** The current historization tick (counting up to
208 * cTicksHistoriziationInterval and then resetting). */
209 uint32_t iTickHistorization;
210 /** The current timer interval. This is set to 0 when inactive. */
211 uint32_t cNsInterval;
212 /** The current timer frequency. This is set to 0 when inactive. */
213 uint32_t uTimerHz;
214 /** The current max frequency reported by the EMTs.
215 * This gets historicize and reset by the timer callback. This is
216 * read without holding the spinlock, so needs atomic updating. */
217 uint32_t volatile uDesiredHz;
218 /** Whether the timer was started or not. */
219 bool volatile fStarted;
220 /** Set if we're starting timer. */
221 bool volatile fStarting;
222 /** The index of the next history entry (mod it). */
223 uint32_t iHzHistory;
224 /** Historicized uDesiredHz values. The array wraps around, new entries
225 * are added at iHzHistory. This is updated approximately every
226 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
227 uint32_t aHzHistory[8];
228 /** Statistics counter for recording the number of interval changes. */
229 uint32_t cChanges;
230 /** Statistics counter for recording the number of timer starts. */
231 uint32_t cStarts;
232 } Ppt;
233#endif /* GVMM_SCHED_WITH_PPT */
234
235} GVMMHOSTCPU;
236/** Pointer to the per host CPU GVMM data. */
237typedef GVMMHOSTCPU *PGVMMHOSTCPU;
238/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
239#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
240/** The interval on history entry should cover (approximately) give in
241 * nanoseconds. */
242#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
243
244
245/**
246 * The GVMM instance data.
247 */
248typedef struct GVMM
249{
250 /** Eyecatcher / magic. */
251 uint32_t u32Magic;
252 /** The index of the head of the free handle chain. (0 is nil.) */
253 uint16_t volatile iFreeHead;
254 /** The index of the head of the active handle chain. (0 is nil.) */
255 uint16_t volatile iUsedHead;
256 /** The number of VMs. */
257 uint16_t volatile cVMs;
258 /** Alignment padding. */
259 uint16_t u16Reserved;
260 /** The number of EMTs. */
261 uint32_t volatile cEMTs;
262 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
263 uint32_t volatile cHaltedEMTs;
264 /** Mini lock for restricting early wake-ups to one thread. */
265 bool volatile fDoingEarlyWakeUps;
266 bool afPadding[3]; /**< explicit alignment padding. */
267 /** When the next halted or sleeping EMT will wake up.
268 * This is set to 0 when it needs recalculating and to UINT64_MAX when
269 * there are no halted or sleeping EMTs in the GVMM. */
270 uint64_t uNsNextEmtWakeup;
271 /** The lock used to serialize VM creation, destruction and associated events that
272 * isn't performance critical. Owners may acquire the list lock. */
273 RTCRITSECT CreateDestroyLock;
274 /** The lock used to serialize used list updates and accesses.
275 * This indirectly includes scheduling since the scheduler will have to walk the
276 * used list to examin running VMs. Owners may not acquire any other locks. */
277 RTCRITSECTRW UsedLock;
278 /** The handle array.
279 * The size of this array defines the maximum number of currently running VMs.
280 * The first entry is unused as it represents the NIL handle. */
281 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
282
283 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
284 * The number of EMTs that means we no longer consider ourselves alone on a
285 * CPU/Core.
286 */
287 uint32_t cEMTsMeansCompany;
288 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
289 * The minimum sleep time for when we're alone, in nano seconds.
290 */
291 uint32_t nsMinSleepAlone;
292 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
293 * The minimum sleep time for when we've got company, in nano seconds.
294 */
295 uint32_t nsMinSleepCompany;
296 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
297 * The limit for the first round of early wake-ups, given in nano seconds.
298 */
299 uint32_t nsEarlyWakeUp1;
300 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
301 * The limit for the second round of early wake-ups, given in nano seconds.
302 */
303 uint32_t nsEarlyWakeUp2;
304
305 /** Set if we're doing early wake-ups.
306 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
307 bool volatile fDoEarlyWakeUps;
308
309 /** The number of entries in the host CPU array (aHostCpus). */
310 uint32_t cHostCpus;
311 /** Per host CPU data (variable length). */
312 GVMMHOSTCPU aHostCpus[1];
313} GVMM;
314AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
315AssertCompileMemberAlignment(GVMM, UsedLock, 8);
316AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
317/** Pointer to the GVMM instance data. */
318typedef GVMM *PGVMM;
319
320/** The GVMM::u32Magic value (Charlie Haden). */
321#define GVMM_MAGIC UINT32_C(0x19370806)
322
323
324
325/*********************************************************************************************************************************
326* Global Variables *
327*********************************************************************************************************************************/
328/** Pointer to the GVMM instance data.
329 * (Just my general dislike for global variables.) */
330static PGVMM g_pGVMM = NULL;
331
332/** Macro for obtaining and validating the g_pGVMM pointer.
333 * On failure it will return from the invoking function with the specified return value.
334 *
335 * @param pGVMM The name of the pGVMM variable.
336 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
337 * status codes.
338 */
339#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
340 do { \
341 (pGVMM) = g_pGVMM;\
342 AssertPtrReturn((pGVMM), (rc)); \
343 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
344 } while (0)
345
346/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
347 * On failure it will return from the invoking function.
348 *
349 * @param pGVMM The name of the pGVMM variable.
350 */
351#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
352 do { \
353 (pGVMM) = g_pGVMM;\
354 AssertPtrReturnVoid((pGVMM)); \
355 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
356 } while (0)
357
358
359/*********************************************************************************************************************************
360* Internal Functions *
361*********************************************************************************************************************************/
362static void gvmmR0InitPerVMData(PGVM pGVM);
363static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
364static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock);
365static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM);
366
367#ifdef GVMM_SCHED_WITH_PPT
368static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
369#endif
370
371
372/**
373 * Initializes the GVMM.
374 *
375 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
376 *
377 * @returns VBox status code.
378 */
379GVMMR0DECL(int) GVMMR0Init(void)
380{
381 LogFlow(("GVMMR0Init:\n"));
382
383 /*
384 * Allocate and initialize the instance data.
385 */
386 uint32_t cHostCpus = RTMpGetArraySize();
387 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
388
389 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF(GVMM, aHostCpus[cHostCpus]));
390 if (!pGVMM)
391 return VERR_NO_MEMORY;
392 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
393 "GVMM-CreateDestroyLock");
394 if (RT_SUCCESS(rc))
395 {
396 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
397 if (RT_SUCCESS(rc))
398 {
399 pGVMM->u32Magic = GVMM_MAGIC;
400 pGVMM->iUsedHead = 0;
401 pGVMM->iFreeHead = 1;
402
403 /* the nil handle */
404 pGVMM->aHandles[0].iSelf = 0;
405 pGVMM->aHandles[0].iNext = 0;
406
407 /* the tail */
408 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
409 pGVMM->aHandles[i].iSelf = i;
410 pGVMM->aHandles[i].iNext = 0; /* nil */
411
412 /* the rest */
413 while (i-- > 1)
414 {
415 pGVMM->aHandles[i].iSelf = i;
416 pGVMM->aHandles[i].iNext = i + 1;
417 }
418
419 /* The default configuration values. */
420 uint32_t cNsResolution = RTSemEventMultiGetResolution();
421 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
422 if (cNsResolution >= 5*RT_NS_100US)
423 {
424 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
425 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
426 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
427 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
428 }
429 else if (cNsResolution > RT_NS_100US)
430 {
431 pGVMM->nsMinSleepAlone = cNsResolution / 2;
432 pGVMM->nsMinSleepCompany = cNsResolution / 4;
433 pGVMM->nsEarlyWakeUp1 = 0;
434 pGVMM->nsEarlyWakeUp2 = 0;
435 }
436 else
437 {
438 pGVMM->nsMinSleepAlone = 2000;
439 pGVMM->nsMinSleepCompany = 2000;
440 pGVMM->nsEarlyWakeUp1 = 0;
441 pGVMM->nsEarlyWakeUp2 = 0;
442 }
443 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
444
445 /* The host CPU data. */
446 pGVMM->cHostCpus = cHostCpus;
447 uint32_t iCpu = cHostCpus;
448 RTCPUSET PossibleSet;
449 RTMpGetSet(&PossibleSet);
450 while (iCpu-- > 0)
451 {
452 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
453#ifdef GVMM_SCHED_WITH_PPT
454 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
455 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
456 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
457 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
458 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
459 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
460 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
461 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
462 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
463 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
464 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
465 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
466#endif
467
468 if (RTCpuSetIsMember(&PossibleSet, iCpu))
469 {
470 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
471 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
472
473#ifdef GVMM_SCHED_WITH_PPT
474 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
475 50*1000*1000 /* whatever */,
476 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
477 gvmmR0SchedPeriodicPreemptionTimerCallback,
478 &pGVMM->aHostCpus[iCpu]);
479 if (RT_SUCCESS(rc))
480 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
481 if (RT_FAILURE(rc))
482 {
483 while (iCpu < cHostCpus)
484 {
485 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
486 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
487 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
488 iCpu++;
489 }
490 break;
491 }
492#endif
493 }
494 else
495 {
496 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
497 pGVMM->aHostCpus[iCpu].u32Magic = 0;
498 }
499 }
500 if (RT_SUCCESS(rc))
501 {
502 g_pGVMM = pGVMM;
503 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
504 return VINF_SUCCESS;
505 }
506
507 /* bail out. */
508 RTCritSectRwDelete(&pGVMM->UsedLock);
509 }
510 RTCritSectDelete(&pGVMM->CreateDestroyLock);
511 }
512
513 RTMemFree(pGVMM);
514 return rc;
515}
516
517
518/**
519 * Terminates the GVM.
520 *
521 * This is called while owning the loader semaphore (see supdrvLdrFree()).
522 * And unless something is wrong, there should be absolutely no VMs
523 * registered at this point.
524 */
525GVMMR0DECL(void) GVMMR0Term(void)
526{
527 LogFlow(("GVMMR0Term:\n"));
528
529 PGVMM pGVMM = g_pGVMM;
530 g_pGVMM = NULL;
531 if (RT_UNLIKELY(!VALID_PTR(pGVMM)))
532 {
533 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
534 return;
535 }
536
537 /*
538 * First of all, stop all active timers.
539 */
540 uint32_t cActiveTimers = 0;
541 uint32_t iCpu = pGVMM->cHostCpus;
542 while (iCpu-- > 0)
543 {
544 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
545#ifdef GVMM_SCHED_WITH_PPT
546 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
547 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
548 cActiveTimers++;
549#endif
550 }
551 if (cActiveTimers)
552 RTThreadSleep(1); /* fudge */
553
554 /*
555 * Invalidate the and free resources.
556 */
557 pGVMM->u32Magic = ~GVMM_MAGIC;
558 RTCritSectRwDelete(&pGVMM->UsedLock);
559 RTCritSectDelete(&pGVMM->CreateDestroyLock);
560
561 pGVMM->iFreeHead = 0;
562 if (pGVMM->iUsedHead)
563 {
564 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
565 pGVMM->iUsedHead = 0;
566 }
567
568#ifdef GVMM_SCHED_WITH_PPT
569 iCpu = pGVMM->cHostCpus;
570 while (iCpu-- > 0)
571 {
572 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
573 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
574 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
575 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
576 }
577#endif
578
579 RTMemFree(pGVMM);
580}
581
582
583/**
584 * A quick hack for setting global config values.
585 *
586 * @returns VBox status code.
587 *
588 * @param pSession The session handle. Used for authentication.
589 * @param pszName The variable name.
590 * @param u64Value The new value.
591 */
592GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
593{
594 /*
595 * Validate input.
596 */
597 PGVMM pGVMM;
598 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
599 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
600 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
601
602 /*
603 * String switch time!
604 */
605 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
606 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
607 int rc = VINF_SUCCESS;
608 pszName += sizeof("/GVMM/") - 1;
609 if (!strcmp(pszName, "cEMTsMeansCompany"))
610 {
611 if (u64Value <= UINT32_MAX)
612 pGVMM->cEMTsMeansCompany = u64Value;
613 else
614 rc = VERR_OUT_OF_RANGE;
615 }
616 else if (!strcmp(pszName, "MinSleepAlone"))
617 {
618 if (u64Value <= RT_NS_100MS)
619 pGVMM->nsMinSleepAlone = u64Value;
620 else
621 rc = VERR_OUT_OF_RANGE;
622 }
623 else if (!strcmp(pszName, "MinSleepCompany"))
624 {
625 if (u64Value <= RT_NS_100MS)
626 pGVMM->nsMinSleepCompany = u64Value;
627 else
628 rc = VERR_OUT_OF_RANGE;
629 }
630 else if (!strcmp(pszName, "EarlyWakeUp1"))
631 {
632 if (u64Value <= RT_NS_100MS)
633 {
634 pGVMM->nsEarlyWakeUp1 = u64Value;
635 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
636 }
637 else
638 rc = VERR_OUT_OF_RANGE;
639 }
640 else if (!strcmp(pszName, "EarlyWakeUp2"))
641 {
642 if (u64Value <= RT_NS_100MS)
643 {
644 pGVMM->nsEarlyWakeUp2 = u64Value;
645 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
646 }
647 else
648 rc = VERR_OUT_OF_RANGE;
649 }
650 else
651 rc = VERR_CFGM_VALUE_NOT_FOUND;
652 return rc;
653}
654
655
656/**
657 * A quick hack for getting global config values.
658 *
659 * @returns VBox status code.
660 *
661 * @param pSession The session handle. Used for authentication.
662 * @param pszName The variable name.
663 * @param pu64Value Where to return the value.
664 */
665GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
666{
667 /*
668 * Validate input.
669 */
670 PGVMM pGVMM;
671 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
672 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
673 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
674 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
675
676 /*
677 * String switch time!
678 */
679 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
680 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
681 int rc = VINF_SUCCESS;
682 pszName += sizeof("/GVMM/") - 1;
683 if (!strcmp(pszName, "cEMTsMeansCompany"))
684 *pu64Value = pGVMM->cEMTsMeansCompany;
685 else if (!strcmp(pszName, "MinSleepAlone"))
686 *pu64Value = pGVMM->nsMinSleepAlone;
687 else if (!strcmp(pszName, "MinSleepCompany"))
688 *pu64Value = pGVMM->nsMinSleepCompany;
689 else if (!strcmp(pszName, "EarlyWakeUp1"))
690 *pu64Value = pGVMM->nsEarlyWakeUp1;
691 else if (!strcmp(pszName, "EarlyWakeUp2"))
692 *pu64Value = pGVMM->nsEarlyWakeUp2;
693 else
694 rc = VERR_CFGM_VALUE_NOT_FOUND;
695 return rc;
696}
697
698
699/**
700 * Acquire the 'used' lock in shared mode.
701 *
702 * This prevents destruction of the VM while we're in ring-0.
703 *
704 * @returns IPRT status code, see RTSemFastMutexRequest.
705 * @param a_pGVMM The GVMM instance data.
706 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
707 */
708#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
709
710/**
711 * Release the 'used' lock in when owning it in shared mode.
712 *
713 * @returns IPRT status code, see RTSemFastMutexRequest.
714 * @param a_pGVMM The GVMM instance data.
715 * @sa GVMMR0_USED_SHARED_LOCK
716 */
717#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
718
719/**
720 * Acquire the 'used' lock in exclusive mode.
721 *
722 * Only use this function when making changes to the used list.
723 *
724 * @returns IPRT status code, see RTSemFastMutexRequest.
725 * @param a_pGVMM The GVMM instance data.
726 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
727 */
728#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
729
730/**
731 * Release the 'used' lock when owning it in exclusive mode.
732 *
733 * @returns IPRT status code, see RTSemFastMutexRelease.
734 * @param a_pGVMM The GVMM instance data.
735 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
736 */
737#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
738
739
740/**
741 * Try acquire the 'create & destroy' lock.
742 *
743 * @returns IPRT status code, see RTSemFastMutexRequest.
744 * @param pGVMM The GVMM instance data.
745 */
746DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
747{
748 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
749 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
750 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
751 return rc;
752}
753
754
755/**
756 * Release the 'create & destroy' lock.
757 *
758 * @returns IPRT status code, see RTSemFastMutexRequest.
759 * @param pGVMM The GVMM instance data.
760 */
761DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
762{
763 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
764 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
765 AssertRC(rc);
766 return rc;
767}
768
769
770/**
771 * Request wrapper for the GVMMR0CreateVM API.
772 *
773 * @returns VBox status code.
774 * @param pReq The request buffer.
775 * @param pSession The session handle. The VM will be associated with this.
776 */
777GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
778{
779 /*
780 * Validate the request.
781 */
782 if (!VALID_PTR(pReq))
783 return VERR_INVALID_POINTER;
784 if (pReq->Hdr.cbReq != sizeof(*pReq))
785 return VERR_INVALID_PARAMETER;
786 if (pReq->pSession != pSession)
787 return VERR_INVALID_POINTER;
788
789 /*
790 * Execute it.
791 */
792 PVM pVM;
793 pReq->pVMR0 = NULL;
794 pReq->pVMR3 = NIL_RTR3PTR;
795 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pVM);
796 if (RT_SUCCESS(rc))
797 {
798 pReq->pVMR0 = pVM;
799 pReq->pVMR3 = pVM->pVMR3;
800 }
801 return rc;
802}
803
804
805/**
806 * Allocates the VM structure and registers it with GVM.
807 *
808 * The caller will become the VM owner and there by the EMT.
809 *
810 * @returns VBox status code.
811 * @param pSession The support driver session.
812 * @param cCpus Number of virtual CPUs for the new VM.
813 * @param ppVM Where to store the pointer to the VM structure.
814 *
815 * @thread EMT.
816 */
817GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppVM)
818{
819 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
820 PGVMM pGVMM;
821 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
822
823 AssertPtrReturn(ppVM, VERR_INVALID_POINTER);
824 *ppVM = NULL;
825
826 if ( cCpus == 0
827 || cCpus > VMM_MAX_CPU_COUNT)
828 return VERR_INVALID_PARAMETER;
829
830 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
831 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
832 RTPROCESS ProcId = RTProcSelf();
833 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
834
835 /*
836 * The whole allocation process is protected by the lock.
837 */
838 int rc = gvmmR0CreateDestroyLock(pGVMM);
839 AssertRCReturn(rc, rc);
840
841 /*
842 * Only one VM per session.
843 */
844 if (SUPR0GetSessionVM(pSession) != NULL)
845 {
846 gvmmR0CreateDestroyUnlock(pGVMM);
847 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
848 return VERR_ALREADY_EXISTS;
849 }
850
851 /*
852 * Allocate a handle first so we don't waste resources unnecessarily.
853 */
854 uint16_t iHandle = pGVMM->iFreeHead;
855 if (iHandle)
856 {
857 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
858
859 /* consistency checks, a bit paranoid as always. */
860 if ( !pHandle->pVM
861 && !pHandle->pGVM
862 && !pHandle->pvObj
863 && pHandle->iSelf == iHandle)
864 {
865 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
866 if (pHandle->pvObj)
867 {
868 /*
869 * Move the handle from the free to used list and perform permission checks.
870 */
871 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
872 AssertRC(rc);
873
874 pGVMM->iFreeHead = pHandle->iNext;
875 pHandle->iNext = pGVMM->iUsedHead;
876 pGVMM->iUsedHead = iHandle;
877 pGVMM->cVMs++;
878
879 pHandle->pVM = NULL;
880 pHandle->pGVM = NULL;
881 pHandle->pSession = pSession;
882 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
883 pHandle->ProcId = NIL_RTPROCESS;
884
885 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
886
887 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
888 if (RT_SUCCESS(rc))
889 {
890 /*
891 * Allocate the global VM structure (GVM) and initialize it.
892 */
893 PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF(GVM, aCpus[cCpus]));
894 if (pGVM)
895 {
896 pGVM->u32Magic = GVM_MAGIC;
897 pGVM->hSelf = iHandle;
898 pGVM->pVM = NULL;
899 pGVM->cCpus = cCpus;
900 pGVM->pSession = pSession;
901
902 gvmmR0InitPerVMData(pGVM);
903 GMMR0InitPerVMData(pGVM);
904
905 /*
906 * Allocate the shared VM structure and associated page array.
907 */
908 const uint32_t cbVM = RT_UOFFSETOF(VM, aCpus[cCpus]);
909 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
910 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
911 if (RT_SUCCESS(rc))
912 {
913 PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM);
914 memset(pVM, 0, cPages << PAGE_SHIFT);
915 pVM->enmVMState = VMSTATE_CREATING;
916 pVM->pVMR0 = pVM;
917 pVM->pSession = pSession;
918 pVM->hSelf = iHandle;
919 pVM->cbSelf = cbVM;
920 pVM->cCpus = cCpus;
921 pVM->uCpuExecutionCap = 100; /* default is no cap. */
922 pVM->offVMCPU = RT_UOFFSETOF(VM, aCpus);
923 AssertCompileMemberAlignment(VM, cpum, 64);
924 AssertCompileMemberAlignment(VM, tm, 64);
925 AssertCompileMemberAlignment(VM, aCpus, PAGE_SIZE);
926
927 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
928 if (RT_SUCCESS(rc))
929 {
930 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
931 for (uint32_t iPage = 0; iPage < cPages; iPage++)
932 {
933 paPages[iPage].uReserved = 0;
934 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
935 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
936 }
937
938 /*
939 * Map them into ring-3.
940 */
941 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
942 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
943 if (RT_SUCCESS(rc))
944 {
945 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
946 pVM->pVMR3 = pVMR3;
947 AssertPtr((void *)pVMR3);
948
949 /* Initialize all the VM pointers. */
950 for (uint32_t i = 0; i < cCpus; i++)
951 {
952 pVM->aCpus[i].pVMR0 = pVM;
953 pVM->aCpus[i].pVMR3 = pVMR3;
954 pVM->aCpus[i].idHostCpu = NIL_RTCPUID;
955 pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
956 }
957
958 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
959 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
960 NIL_RTR0PROCESS);
961 if (RT_SUCCESS(rc))
962 {
963 pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
964 AssertPtr((void *)pVM->paVMPagesR3);
965
966 /* complete the handle - take the UsedLock sem just to be careful. */
967 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
968 AssertRC(rc);
969
970 pHandle->pVM = pVM;
971 pHandle->pGVM = pGVM;
972 pHandle->hEMT0 = hEMT0;
973 pHandle->ProcId = ProcId;
974 pGVM->pVM = pVM;
975 pGVM->pVMR3 = pVMR3;
976 pGVM->aCpus[0].hEMT = hEMT0;
977 pVM->aCpus[0].hNativeThreadR0 = hEMT0;
978 pGVMM->cEMTs += cCpus;
979
980 for (uint32_t i = 0; i < cCpus; i++)
981 {
982 pGVM->aCpus[i].pVCpu = &pVM->aCpus[i];
983 pGVM->aCpus[i].pVM = pVM;
984 }
985
986 /* Associate it with the session and create the context hook for EMT0. */
987 rc = SUPR0SetSessionVM(pSession, pGVM, pVM);
988 if (RT_SUCCESS(rc))
989 {
990 rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[0]);
991 if (RT_SUCCESS(rc))
992 {
993 /*
994 * Done!
995 */
996 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pVM, ProcId, (void *)hEMT0, cCpus);
997
998 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
999 gvmmR0CreateDestroyUnlock(pGVMM);
1000
1001 CPUMR0RegisterVCpuThread(&pVM->aCpus[0]);
1002
1003 *ppVM = pVM;
1004 Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVMR3, pGVM, iHandle));
1005 return VINF_SUCCESS;
1006 }
1007
1008 SUPR0SetSessionVM(pSession, NULL, NULL);
1009 }
1010 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1011 }
1012
1013 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1014 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1015 }
1016 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
1017 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1018 }
1019 RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
1020 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1021 }
1022 }
1023 }
1024 /* else: The user wasn't permitted to create this VM. */
1025
1026 /*
1027 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1028 * object reference here. A little extra mess because of non-recursive lock.
1029 */
1030 void *pvObj = pHandle->pvObj;
1031 pHandle->pvObj = NULL;
1032 gvmmR0CreateDestroyUnlock(pGVMM);
1033
1034 SUPR0ObjRelease(pvObj, pSession);
1035
1036 SUPR0Printf("GVMMR0CreateVM: failed, rc=%d\n", rc);
1037 return rc;
1038 }
1039
1040 rc = VERR_NO_MEMORY;
1041 }
1042 else
1043 rc = VERR_GVMM_IPE_1;
1044 }
1045 else
1046 rc = VERR_GVM_TOO_MANY_VMS;
1047
1048 gvmmR0CreateDestroyUnlock(pGVMM);
1049 return rc;
1050}
1051
1052
1053/**
1054 * Initializes the per VM data belonging to GVMM.
1055 *
1056 * @param pGVM Pointer to the global VM structure.
1057 */
1058static void gvmmR0InitPerVMData(PGVM pGVM)
1059{
1060 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1061 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1062 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1063 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1064 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1065 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1066 pGVM->gvmm.s.fDoneVMMR0Init = false;
1067 pGVM->gvmm.s.fDoneVMMR0Term = false;
1068
1069 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1070 {
1071 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1072 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1073 pGVM->aCpus[i].pGVM = pGVM;
1074 pGVM->aCpus[i].pVCpu = NULL;
1075 pGVM->aCpus[i].pVM = NULL;
1076 }
1077}
1078
1079
1080/**
1081 * Does the VM initialization.
1082 *
1083 * @returns VBox status code.
1084 * @param pGVM The global (ring-0) VM structure.
1085 */
1086GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1087{
1088 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1089
1090 int rc = VERR_INTERNAL_ERROR_3;
1091 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1092 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1093 {
1094 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1095 {
1096 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1097 if (RT_FAILURE(rc))
1098 {
1099 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1100 break;
1101 }
1102 }
1103 }
1104 else
1105 rc = VERR_WRONG_ORDER;
1106
1107 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1108 return rc;
1109}
1110
1111
1112/**
1113 * Indicates that we're done with the ring-0 initialization
1114 * of the VM.
1115 *
1116 * @param pGVM The global (ring-0) VM structure.
1117 * @thread EMT(0)
1118 */
1119GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1120{
1121 /* Set the indicator. */
1122 pGVM->gvmm.s.fDoneVMMR0Init = true;
1123}
1124
1125
1126/**
1127 * Indicates that we're doing the ring-0 termination of the VM.
1128 *
1129 * @returns true if termination hasn't been done already, false if it has.
1130 * @param pGVM Pointer to the global VM structure. Optional.
1131 * @thread EMT(0) or session cleanup thread.
1132 */
1133GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1134{
1135 /* Validate the VM structure, state and handle. */
1136 AssertPtrReturn(pGVM, false);
1137
1138 /* Set the indicator. */
1139 if (pGVM->gvmm.s.fDoneVMMR0Term)
1140 return false;
1141 pGVM->gvmm.s.fDoneVMMR0Term = true;
1142 return true;
1143}
1144
1145
1146/**
1147 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1148 *
1149 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1150 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1151 * would've been nice if the caller was actually the EMT thread or that we somehow
1152 * could've associated the calling thread with the VM up front.
1153 *
1154 * @returns VBox status code.
1155 * @param pGVM The global (ring-0) VM structure.
1156 * @param pVM The cross context VM structure.
1157 *
1158 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1159 */
1160GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM, PVM pVM)
1161{
1162 LogFlow(("GVMMR0DestroyVM: pGVM=%p pVM=%p\n", pGVM, pVM));
1163 PGVMM pGVMM;
1164 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1165
1166 /*
1167 * Validate the VM structure, state and caller.
1168 */
1169 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1170 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1171 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1172 AssertReturn(pGVM->pVM == pVM, VERR_INVALID_POINTER);
1173 AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState),
1174 VERR_WRONG_ORDER);
1175
1176 uint32_t hGVM = pGVM->hSelf;
1177 ASMCompilerBarrier();
1178 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1179 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1180
1181 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1182 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1183
1184 RTPROCESS ProcId = RTProcSelf();
1185 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1186 AssertReturn( ( pHandle->hEMT0 == hSelf
1187 && pHandle->ProcId == ProcId)
1188 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1189
1190 /*
1191 * Lookup the handle and destroy the object.
1192 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1193 * object, we take some precautions against racing callers just in case...
1194 */
1195 int rc = gvmmR0CreateDestroyLock(pGVMM);
1196 AssertRC(rc);
1197
1198 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1199 if ( pHandle->pVM == pVM
1200 && ( ( pHandle->hEMT0 == hSelf
1201 && pHandle->ProcId == ProcId)
1202 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1203 && VALID_PTR(pHandle->pvObj)
1204 && VALID_PTR(pHandle->pSession)
1205 && VALID_PTR(pHandle->pGVM)
1206 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1207 {
1208 /* Check that other EMTs have deregistered. */
1209 uint32_t cNotDeregistered = 0;
1210 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1211 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */
1212 if (cNotDeregistered == 0)
1213 {
1214 /* Grab the object pointer. */
1215 void *pvObj = pHandle->pvObj;
1216 pHandle->pvObj = NULL;
1217 gvmmR0CreateDestroyUnlock(pGVMM);
1218
1219 SUPR0ObjRelease(pvObj, pHandle->pSession);
1220 }
1221 else
1222 {
1223 gvmmR0CreateDestroyUnlock(pGVMM);
1224 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1225 }
1226 }
1227 else
1228 {
1229 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n",
1230 pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf);
1231 gvmmR0CreateDestroyUnlock(pGVMM);
1232 rc = VERR_GVMM_IPE_2;
1233 }
1234
1235 return rc;
1236}
1237
1238
1239/**
1240 * Performs VM cleanup task as part of object destruction.
1241 *
1242 * @param pGVM The GVM pointer.
1243 */
1244static void gvmmR0CleanupVM(PGVM pGVM)
1245{
1246 if ( pGVM->gvmm.s.fDoneVMMR0Init
1247 && !pGVM->gvmm.s.fDoneVMMR0Term)
1248 {
1249 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1250 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM)
1251 {
1252 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1253 VMMR0TermVM(pGVM, pGVM->pVM, NIL_VMCPUID);
1254 }
1255 else
1256 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM));
1257 }
1258
1259 GMMR0CleanupVM(pGVM);
1260#ifdef VBOX_WITH_NEM_R0
1261 NEMR0CleanupVM(pGVM);
1262#endif
1263
1264 AssertCompile((uintptr_t)NIL_RTTHREADCTXHOOK == 0); /* Depends on zero initialized memory working for NIL at the moment. */
1265 for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++)
1266 {
1267 /** @todo Can we busy wait here for all thread-context hooks to be
1268 * deregistered before releasing (destroying) it? Only until we find a
1269 * solution for not deregistering hooks everytime we're leaving HMR0
1270 * context. */
1271 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->pVM->aCpus[idCpu]);
1272 }
1273}
1274
1275
1276/**
1277 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1278 *
1279 * pvUser1 is the GVM instance pointer.
1280 * pvUser2 is the handle pointer.
1281 */
1282static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1283{
1284 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1285
1286 NOREF(pvObj);
1287
1288 /*
1289 * Some quick, paranoid, input validation.
1290 */
1291 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1292 AssertPtr(pHandle);
1293 PGVMM pGVMM = (PGVMM)pvUser1;
1294 Assert(pGVMM == g_pGVMM);
1295 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1296 if ( !iHandle
1297 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1298 || iHandle != pHandle->iSelf)
1299 {
1300 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1301 return;
1302 }
1303
1304 int rc = gvmmR0CreateDestroyLock(pGVMM);
1305 AssertRC(rc);
1306 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1307 AssertRC(rc);
1308
1309 /*
1310 * This is a tad slow but a doubly linked list is too much hassle.
1311 */
1312 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1313 {
1314 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1315 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1316 gvmmR0CreateDestroyUnlock(pGVMM);
1317 return;
1318 }
1319
1320 if (pGVMM->iUsedHead == iHandle)
1321 pGVMM->iUsedHead = pHandle->iNext;
1322 else
1323 {
1324 uint16_t iPrev = pGVMM->iUsedHead;
1325 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1326 while (iPrev)
1327 {
1328 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1329 {
1330 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1331 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1332 gvmmR0CreateDestroyUnlock(pGVMM);
1333 return;
1334 }
1335 if (RT_UNLIKELY(c-- <= 0))
1336 {
1337 iPrev = 0;
1338 break;
1339 }
1340
1341 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1342 break;
1343 iPrev = pGVMM->aHandles[iPrev].iNext;
1344 }
1345 if (!iPrev)
1346 {
1347 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1348 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1349 gvmmR0CreateDestroyUnlock(pGVMM);
1350 return;
1351 }
1352
1353 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1354 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1355 }
1356 pHandle->iNext = 0;
1357 pGVMM->cVMs--;
1358
1359 /*
1360 * Do the global cleanup round.
1361 */
1362 PGVM pGVM = pHandle->pGVM;
1363 if ( VALID_PTR(pGVM)
1364 && pGVM->u32Magic == GVM_MAGIC)
1365 {
1366 pGVMM->cEMTs -= pGVM->cCpus;
1367
1368 if (pGVM->pSession)
1369 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1370
1371 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1372
1373 gvmmR0CleanupVM(pGVM);
1374
1375 /*
1376 * Do the GVMM cleanup - must be done last.
1377 */
1378 /* The VM and VM pages mappings/allocations. */
1379 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1380 {
1381 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1382 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1383 }
1384
1385 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1386 {
1387 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1388 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1389 }
1390
1391 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1392 {
1393 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1394 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1395 }
1396
1397 if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ)
1398 {
1399 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc);
1400 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1401 }
1402
1403 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1404 {
1405 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1406 {
1407 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1408 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1409 }
1410 }
1411
1412 /* the GVM structure itself. */
1413 pGVM->u32Magic |= UINT32_C(0x80000000);
1414 RTMemFree(pGVM);
1415
1416 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1417 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1418 AssertRC(rc);
1419 }
1420 /* else: GVMMR0CreateVM cleanup. */
1421
1422 /*
1423 * Free the handle.
1424 */
1425 pHandle->iNext = pGVMM->iFreeHead;
1426 pGVMM->iFreeHead = iHandle;
1427 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1428 ASMAtomicWriteNullPtr(&pHandle->pVM);
1429 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1430 ASMAtomicWriteNullPtr(&pHandle->pSession);
1431 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1432 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1433
1434 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1435 gvmmR0CreateDestroyUnlock(pGVMM);
1436 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1437}
1438
1439
1440/**
1441 * Registers the calling thread as the EMT of a Virtual CPU.
1442 *
1443 * Note that VCPU 0 is automatically registered during VM creation.
1444 *
1445 * @returns VBox status code
1446 * @param pGVM The global (ring-0) VM structure.
1447 * @param pVM The cross context VM structure.
1448 * @param idCpu VCPU id to register the current thread as.
1449 */
1450GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1451{
1452 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1453
1454 /*
1455 * Validate the VM structure, state and handle.
1456 */
1457 PGVMM pGVMM;
1458 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /* fTakeUsedLock */); /** @todo take lock here. */
1459 if (RT_SUCCESS(rc))
1460 {
1461 if (idCpu < pGVM->cCpus)
1462 {
1463 /* Check that the EMT isn't already assigned to a thread. */
1464 if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
1465 {
1466 Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1467
1468 /* A thread may only be one EMT. */
1469 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1470 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1471 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1472 if (RT_SUCCESS(rc))
1473 {
1474 /*
1475 * Do the assignment, then try setup the hook. Undo if that fails.
1476 */
1477 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1478
1479 rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[idCpu]);
1480 if (RT_SUCCESS(rc))
1481 CPUMR0RegisterVCpuThread(&pVM->aCpus[idCpu]);
1482 else
1483 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1484 }
1485 }
1486 else
1487 rc = VERR_ACCESS_DENIED;
1488 }
1489 else
1490 rc = VERR_INVALID_CPU_ID;
1491 }
1492 return rc;
1493}
1494
1495
1496/**
1497 * Deregisters the calling thread as the EMT of a Virtual CPU.
1498 *
1499 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1500 *
1501 * @returns VBox status code
1502 * @param pGVM The global (ring-0) VM structure.
1503 * @param pVM The cross context VM structure.
1504 * @param idCpu VCPU id to register the current thread as.
1505 */
1506GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1507{
1508 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1509
1510 /*
1511 * Validate the VM structure, state and handle.
1512 */
1513 PGVMM pGVMM;
1514 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
1515 if (RT_SUCCESS(rc))
1516 {
1517 /*
1518 * Take the destruction lock and recheck the handle state to
1519 * prevent racing GVMMR0DestroyVM.
1520 */
1521 gvmmR0CreateDestroyLock(pGVMM);
1522 uint32_t hSelf = pGVM->hSelf;
1523 ASMCompilerBarrier();
1524 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1525 && pGVMM->aHandles[hSelf].pvObj != NULL
1526 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1527 {
1528 /*
1529 * Do per-EMT cleanups.
1530 */
1531 VMMR0ThreadCtxHookDestroyForEmt(&pVM->aCpus[idCpu]);
1532
1533 /*
1534 * Invalidate hEMT. We don't use NIL here as that would allow
1535 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1536 */
1537 AssertCompile(~(RTNATIVETHREAD)1 != NIL_RTNATIVETHREAD);
1538 pGVM->aCpus[idCpu].hEMT = ~(RTNATIVETHREAD)1;
1539 pVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1540 }
1541
1542 gvmmR0CreateDestroyUnlock(pGVMM);
1543 }
1544 return rc;
1545}
1546
1547
1548/**
1549 * Lookup a GVM structure by its handle.
1550 *
1551 * @returns The GVM pointer on success, NULL on failure.
1552 * @param hGVM The global VM handle. Asserts on bad handle.
1553 */
1554GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1555{
1556 PGVMM pGVMM;
1557 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1558
1559 /*
1560 * Validate.
1561 */
1562 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1563 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1564
1565 /*
1566 * Look it up.
1567 */
1568 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1569 AssertPtrReturn(pHandle->pVM, NULL);
1570 AssertPtrReturn(pHandle->pvObj, NULL);
1571 PGVM pGVM = pHandle->pGVM;
1572 AssertPtrReturn(pGVM, NULL);
1573 AssertReturn(pGVM->pVM == pHandle->pVM, NULL);
1574
1575 return pHandle->pGVM;
1576}
1577
1578
1579/**
1580 * Lookup a GVM structure by the shared VM structure.
1581 *
1582 * The calling thread must be in the same process as the VM. All current lookups
1583 * are by threads inside the same process, so this will not be an issue.
1584 *
1585 * @returns VBox status code.
1586 * @param pVM The cross context VM structure.
1587 * @param ppGVM Where to store the GVM pointer.
1588 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1589 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1590 * shared mode when requested.
1591 *
1592 * Be very careful if not taking the lock as it's
1593 * possible that the VM will disappear then!
1594 *
1595 * @remark This will not assert on an invalid pVM but try return silently.
1596 */
1597static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1598{
1599 RTPROCESS ProcId = RTProcSelf();
1600 PGVMM pGVMM;
1601 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1602
1603 /*
1604 * Validate.
1605 */
1606 if (RT_UNLIKELY( !VALID_PTR(pVM)
1607 || ((uintptr_t)pVM & PAGE_OFFSET_MASK)))
1608 return VERR_INVALID_POINTER;
1609 if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING
1610 || pVM->enmVMState >= VMSTATE_TERMINATED))
1611 return VERR_INVALID_POINTER;
1612
1613 uint16_t hGVM = pVM->hSelf;
1614 ASMCompilerBarrier();
1615 if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE
1616 || hGVM >= RT_ELEMENTS(pGVMM->aHandles)))
1617 return VERR_INVALID_HANDLE;
1618
1619 /*
1620 * Look it up.
1621 */
1622 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1623 PGVM pGVM;
1624 if (fTakeUsedLock)
1625 {
1626 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1627 AssertRCReturn(rc, rc);
1628
1629 pGVM = pHandle->pGVM;
1630 if (RT_UNLIKELY( pHandle->pVM != pVM
1631 || pHandle->ProcId != ProcId
1632 || !VALID_PTR(pHandle->pvObj)
1633 || !VALID_PTR(pGVM)
1634 || pGVM->pVM != pVM))
1635 {
1636 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1637 return VERR_INVALID_HANDLE;
1638 }
1639 }
1640 else
1641 {
1642 if (RT_UNLIKELY(pHandle->pVM != pVM))
1643 return VERR_INVALID_HANDLE;
1644 if (RT_UNLIKELY(pHandle->ProcId != ProcId))
1645 return VERR_INVALID_HANDLE;
1646 if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj)))
1647 return VERR_INVALID_HANDLE;
1648
1649 pGVM = pHandle->pGVM;
1650 if (RT_UNLIKELY(!VALID_PTR(pGVM)))
1651 return VERR_INVALID_HANDLE;
1652 if (RT_UNLIKELY(pGVM->pVM != pVM))
1653 return VERR_INVALID_HANDLE;
1654 }
1655
1656 *ppGVM = pGVM;
1657 *ppGVMM = pGVMM;
1658 return VINF_SUCCESS;
1659}
1660
1661
1662/**
1663 * Fast look up a GVM structure by the cross context VM structure.
1664 *
1665 * This is mainly used a glue function, so performance is .
1666 *
1667 * @returns GVM on success, NULL on failure.
1668 * @param pVM The cross context VM structure. ASSUMES to be
1669 * reasonably valid, so we can do fewer checks than in
1670 * gvmmR0ByVM.
1671 *
1672 * @note Do not use this on pVM structures from userland!
1673 */
1674GVMMR0DECL(PGVM) GVMMR0FastGetGVMByVM(PVM pVM)
1675{
1676 AssertPtr(pVM);
1677 Assert(!((uintptr_t)pVM & PAGE_OFFSET_MASK));
1678
1679 PGVMM pGVMM;
1680 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1681
1682 /*
1683 * Validate.
1684 */
1685 uint16_t hGVM = pVM->hSelf;
1686 ASMCompilerBarrier();
1687 AssertReturn(hGVM != NIL_GVM_HANDLE && hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1688
1689 /*
1690 * Look it up and check pVM against the value in the handle and GVM structures.
1691 */
1692 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1693 AssertReturn(pHandle->pVM == pVM, NULL);
1694
1695 PGVM pGVM = pHandle->pGVM;
1696 AssertPtrReturn(pGVM, NULL);
1697 AssertReturn(pGVM->pVM == pVM, NULL);
1698
1699 return pGVM;
1700}
1701
1702
1703/**
1704 * Check that the given GVM and VM structures match up.
1705 *
1706 * The calling thread must be in the same process as the VM. All current lookups
1707 * are by threads inside the same process, so this will not be an issue.
1708 *
1709 * @returns VBox status code.
1710 * @param pGVM The global (ring-0) VM structure.
1711 * @param pVM The cross context VM structure.
1712 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1713 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1714 * shared mode when requested.
1715 *
1716 * Be very careful if not taking the lock as it's
1717 * possible that the VM will disappear then!
1718 *
1719 * @remark This will not assert on an invalid pVM but try return silently.
1720 */
1721static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1722{
1723 /*
1724 * Check the pointers.
1725 */
1726 int rc;
1727 if (RT_LIKELY(RT_VALID_PTR(pGVM)))
1728 {
1729 if (RT_LIKELY( RT_VALID_PTR(pVM)
1730 && ((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0))
1731 {
1732 if (RT_LIKELY(pGVM->pVM == pVM))
1733 {
1734 /*
1735 * Get the pGVMM instance and check the VM handle.
1736 */
1737 PGVMM pGVMM;
1738 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1739
1740 uint16_t hGVM = pGVM->hSelf;
1741 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1742 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1743 {
1744 RTPROCESS const pidSelf = RTProcSelf();
1745 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1746 if (fTakeUsedLock)
1747 {
1748 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1749 AssertRCReturn(rc, rc);
1750 }
1751
1752 if (RT_LIKELY( pHandle->pGVM == pGVM
1753 && pHandle->pVM == pVM
1754 && pHandle->ProcId == pidSelf
1755 && RT_VALID_PTR(pHandle->pvObj)))
1756 {
1757 /*
1758 * Some more VM data consistency checks.
1759 */
1760 if (RT_LIKELY( pVM->cCpus == pGVM->cCpus
1761 && pVM->hSelf == hGVM
1762 && pVM->enmVMState >= VMSTATE_CREATING
1763 && pVM->enmVMState <= VMSTATE_TERMINATED
1764 && pVM->pVMR0 == pVM))
1765 {
1766 *ppGVMM = pGVMM;
1767 return VINF_SUCCESS;
1768 }
1769 }
1770
1771 if (fTakeUsedLock)
1772 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1773 }
1774 }
1775 rc = VERR_INVALID_VM_HANDLE;
1776 }
1777 else
1778 rc = VERR_INVALID_POINTER;
1779 }
1780 else
1781 rc = VERR_INVALID_POINTER;
1782 return rc;
1783}
1784
1785
1786/**
1787 * Check that the given GVM and VM structures match up.
1788 *
1789 * The calling thread must be in the same process as the VM. All current lookups
1790 * are by threads inside the same process, so this will not be an issue.
1791 *
1792 * @returns VBox status code.
1793 * @param pGVM The global (ring-0) VM structure.
1794 * @param pVM The cross context VM structure.
1795 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
1796 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1797 * @thread EMT
1798 *
1799 * @remarks This will assert in all failure paths.
1800 */
1801static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM)
1802{
1803 /*
1804 * Check the pointers.
1805 */
1806 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1807
1808 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1809 AssertReturn(((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
1810 AssertReturn(pGVM->pVM == pVM, VERR_INVALID_VM_HANDLE);
1811
1812
1813 /*
1814 * Get the pGVMM instance and check the VM handle.
1815 */
1816 PGVMM pGVMM;
1817 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1818
1819 uint16_t hGVM = pGVM->hSelf;
1820 ASMCompilerBarrier();
1821 AssertReturn( hGVM != NIL_GVM_HANDLE
1822 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1823
1824 RTPROCESS const pidSelf = RTProcSelf();
1825 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1826 AssertReturn( pHandle->pGVM == pGVM
1827 && pHandle->pVM == pVM
1828 && pHandle->ProcId == pidSelf
1829 && RT_VALID_PTR(pHandle->pvObj),
1830 VERR_INVALID_HANDLE);
1831
1832 /*
1833 * Check the EMT claim.
1834 */
1835 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
1836 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1837 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1838
1839 /*
1840 * Some more VM data consistency checks.
1841 */
1842 AssertReturn(pVM->cCpus == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
1843 AssertReturn(pVM->hSelf == hGVM, VERR_INCONSISTENT_VM_HANDLE);
1844 AssertReturn(pVM->pVMR0 == pVM, VERR_INCONSISTENT_VM_HANDLE);
1845 AssertReturn( pVM->enmVMState >= VMSTATE_CREATING
1846 && pVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
1847
1848 *ppGVMM = pGVMM;
1849 return VINF_SUCCESS;
1850}
1851
1852
1853/**
1854 * Validates a GVM/VM pair.
1855 *
1856 * @returns VBox status code.
1857 * @param pGVM The global (ring-0) VM structure.
1858 * @param pVM The cross context VM structure.
1859 */
1860GVMMR0DECL(int) GVMMR0ValidateGVMandVM(PGVM pGVM, PVM pVM)
1861{
1862 PGVMM pGVMM;
1863 return gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /*fTakeUsedLock*/);
1864}
1865
1866
1867
1868/**
1869 * Validates a GVM/VM/EMT combo.
1870 *
1871 * @returns VBox status code.
1872 * @param pGVM The global (ring-0) VM structure.
1873 * @param pVM The cross context VM structure.
1874 * @param idCpu The Virtual CPU ID of the calling EMT.
1875 * @thread EMT(idCpu)
1876 */
1877GVMMR0DECL(int) GVMMR0ValidateGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1878{
1879 PGVMM pGVMM;
1880 return gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
1881}
1882
1883
1884/**
1885 * Looks up the VM belonging to the specified EMT thread.
1886 *
1887 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1888 * unnecessary kernel panics when the EMT thread hits an assertion. The
1889 * call may or not be an EMT thread.
1890 *
1891 * @returns Pointer to the VM on success, NULL on failure.
1892 * @param hEMT The native thread handle of the EMT.
1893 * NIL_RTNATIVETHREAD means the current thread
1894 */
1895GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1896{
1897 /*
1898 * No Assertions here as we're usually called in a AssertMsgN or
1899 * RTAssert* context.
1900 */
1901 PGVMM pGVMM = g_pGVMM;
1902 if ( !VALID_PTR(pGVMM)
1903 || pGVMM->u32Magic != GVMM_MAGIC)
1904 return NULL;
1905
1906 if (hEMT == NIL_RTNATIVETHREAD)
1907 hEMT = RTThreadNativeSelf();
1908 RTPROCESS ProcId = RTProcSelf();
1909
1910 /*
1911 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1912 */
1913 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1914 {
1915 if ( pGVMM->aHandles[i].iSelf == i
1916 && pGVMM->aHandles[i].ProcId == ProcId
1917 && VALID_PTR(pGVMM->aHandles[i].pvObj)
1918 && VALID_PTR(pGVMM->aHandles[i].pVM)
1919 && VALID_PTR(pGVMM->aHandles[i].pGVM))
1920 {
1921 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1922 return pGVMM->aHandles[i].pVM;
1923
1924 /* This is fearly safe with the current process per VM approach. */
1925 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1926 VMCPUID const cCpus = pGVM->cCpus;
1927 if ( cCpus < 1
1928 || cCpus > VMM_MAX_CPU_COUNT)
1929 continue;
1930 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1931 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1932 return pGVMM->aHandles[i].pVM;
1933 }
1934 }
1935 return NULL;
1936}
1937
1938
1939/**
1940 * This is will wake up expired and soon-to-be expired VMs.
1941 *
1942 * @returns Number of VMs that has been woken up.
1943 * @param pGVMM Pointer to the GVMM instance data.
1944 * @param u64Now The current time.
1945 */
1946static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
1947{
1948 /*
1949 * Skip this if we've got disabled because of high resolution wakeups or by
1950 * the user.
1951 */
1952 if (!pGVMM->fDoEarlyWakeUps)
1953 return 0;
1954
1955/** @todo Rewrite this algorithm. See performance defect XYZ. */
1956
1957 /*
1958 * A cheap optimization to stop wasting so much time here on big setups.
1959 */
1960 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
1961 if ( pGVMM->cHaltedEMTs == 0
1962 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
1963 return 0;
1964
1965 /*
1966 * Only one thread doing this at a time.
1967 */
1968 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
1969 return 0;
1970
1971 /*
1972 * The first pass will wake up VMs which have actually expired
1973 * and look for VMs that should be woken up in the 2nd and 3rd passes.
1974 */
1975 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
1976 uint64_t u64Min = UINT64_MAX;
1977 unsigned cWoken = 0;
1978 unsigned cHalted = 0;
1979 unsigned cTodo2nd = 0;
1980 unsigned cTodo3rd = 0;
1981 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1982 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1983 i = pGVMM->aHandles[i].iNext)
1984 {
1985 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1986 if ( VALID_PTR(pCurGVM)
1987 && pCurGVM->u32Magic == GVM_MAGIC)
1988 {
1989 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1990 {
1991 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1992 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1993 if (u64)
1994 {
1995 if (u64 <= u64Now)
1996 {
1997 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1998 {
1999 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2000 AssertRC(rc);
2001 cWoken++;
2002 }
2003 }
2004 else
2005 {
2006 cHalted++;
2007 if (u64 <= uNsEarlyWakeUp1)
2008 cTodo2nd++;
2009 else if (u64 <= uNsEarlyWakeUp2)
2010 cTodo3rd++;
2011 else if (u64 < u64Min)
2012 u64 = u64Min;
2013 }
2014 }
2015 }
2016 }
2017 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2018 }
2019
2020 if (cTodo2nd)
2021 {
2022 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2023 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2024 i = pGVMM->aHandles[i].iNext)
2025 {
2026 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2027 if ( VALID_PTR(pCurGVM)
2028 && pCurGVM->u32Magic == GVM_MAGIC)
2029 {
2030 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2031 {
2032 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2033 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2034 if ( u64
2035 && u64 <= uNsEarlyWakeUp1)
2036 {
2037 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2038 {
2039 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2040 AssertRC(rc);
2041 cWoken++;
2042 }
2043 }
2044 }
2045 }
2046 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2047 }
2048 }
2049
2050 if (cTodo3rd)
2051 {
2052 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2053 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2054 i = pGVMM->aHandles[i].iNext)
2055 {
2056 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2057 if ( VALID_PTR(pCurGVM)
2058 && pCurGVM->u32Magic == GVM_MAGIC)
2059 {
2060 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2061 {
2062 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2063 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2064 if ( u64
2065 && u64 <= uNsEarlyWakeUp2)
2066 {
2067 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2068 {
2069 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2070 AssertRC(rc);
2071 cWoken++;
2072 }
2073 }
2074 }
2075 }
2076 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2077 }
2078 }
2079
2080 /*
2081 * Set the minimum value.
2082 */
2083 pGVMM->uNsNextEmtWakeup = u64Min;
2084
2085 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2086 return cWoken;
2087}
2088
2089
2090/**
2091 * Halt the EMT thread.
2092 *
2093 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2094 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2095 * @param pGVM The global (ring-0) VM structure.
2096 * @param pVM The cross context VM structure.
2097 * @param idCpu The Virtual CPU ID of the calling EMT.
2098 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2099 * @thread EMT(idCpu).
2100 */
2101GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2102{
2103 LogFlow(("GVMMR0SchedHalt: pGVM=%p pVM=%p idCpu=%#x u64ExpireGipTime=%#RX64\n", pGVM, pVM, idCpu, u64ExpireGipTime));
2104 GVMM_CHECK_SMAP_SETUP();
2105 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2106
2107 /*
2108 * Validate the VM structure, state and handle.
2109 */
2110 PGVMM pGVMM;
2111 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2112 if (RT_FAILURE(rc))
2113 return rc;
2114 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2115 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2116
2117 PGVMCPU pCurGVCpu = &pGVM->aCpus[idCpu];
2118 Assert(!pCurGVCpu->gvmm.s.u64HaltExpire);
2119
2120 /*
2121 * If we're doing early wake-ups, we must take the UsedList lock before we
2122 * start querying the current time.
2123 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2124 */
2125 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2126 if (fDoEarlyWakeUps)
2127 {
2128 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2129 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2130 }
2131
2132 pCurGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
2133
2134 /* GIP hack: We might are frequently sleeping for short intervals where the
2135 difference between GIP and system time matters on systems with high resolution
2136 system time. So, convert the input from GIP to System time in that case. */
2137 Assert(ASMGetFlags() & X86_EFL_IF);
2138 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2139 const uint64_t u64NowGip = RTTimeNanoTS();
2140 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2141
2142 if (fDoEarlyWakeUps)
2143 {
2144 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2145 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2146 }
2147
2148 /*
2149 * Go to sleep if we must...
2150 * Cap the sleep time to 1 second to be on the safe side.
2151 */
2152 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2153 if ( u64NowGip < u64ExpireGipTime
2154 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2155 ? pGVMM->nsMinSleepCompany
2156 : pGVMM->nsMinSleepAlone))
2157 {
2158 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2159 if (cNsInterval > RT_NS_1SEC)
2160 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2161 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2162 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2163 if (fDoEarlyWakeUps)
2164 {
2165 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2166 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2167 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2168 }
2169 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2170
2171 rc = RTSemEventMultiWaitEx(pCurGVCpu->gvmm.s.HaltEventMulti,
2172 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2173 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2174 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2175
2176 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0);
2177 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2178
2179 /* Reset the semaphore to try prevent a few false wake-ups. */
2180 if (rc == VINF_SUCCESS)
2181 {
2182 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
2183 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2184 }
2185 else if (rc == VERR_TIMEOUT)
2186 {
2187 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2188 rc = VINF_SUCCESS;
2189 }
2190 }
2191 else
2192 {
2193 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2194 if (fDoEarlyWakeUps)
2195 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2196 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2197 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
2198 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2199 }
2200
2201 return rc;
2202}
2203
2204
2205/**
2206 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2207 * the a sleeping EMT.
2208 *
2209 * @retval VINF_SUCCESS if successfully woken up.
2210 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2211 *
2212 * @param pGVM The global (ring-0) VM structure.
2213 * @param pGVCpu The global (ring-0) VCPU structure.
2214 */
2215DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2216{
2217 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2218
2219 /*
2220 * Signal the semaphore regardless of whether it's current blocked on it.
2221 *
2222 * The reason for this is that there is absolutely no way we can be 100%
2223 * certain that it isn't *about* go to go to sleep on it and just got
2224 * delayed a bit en route. So, we will always signal the semaphore when
2225 * the it is flagged as halted in the VMM.
2226 */
2227/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2228 int rc;
2229 if (pGVCpu->gvmm.s.u64HaltExpire)
2230 {
2231 rc = VINF_SUCCESS;
2232 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2233 }
2234 else
2235 {
2236 rc = VINF_GVM_NOT_BLOCKED;
2237 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2238 }
2239
2240 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2241 AssertRC(rc2);
2242
2243 return rc;
2244}
2245
2246
2247/**
2248 * Wakes up the halted EMT thread so it can service a pending request.
2249 *
2250 * @returns VBox status code.
2251 * @retval VINF_SUCCESS if successfully woken up.
2252 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2253 *
2254 * @param pGVM The global (ring-0) VM structure.
2255 * @param pVM The cross context VM structure.
2256 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2257 * @param fTakeUsedLock Take the used lock or not
2258 * @thread Any but EMT(idCpu).
2259 */
2260GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
2261{
2262 GVMM_CHECK_SMAP_SETUP();
2263 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2264
2265 /*
2266 * Validate input and take the UsedLock.
2267 */
2268 PGVMM pGVMM;
2269 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
2270 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2271 if (RT_SUCCESS(rc))
2272 {
2273 if (idCpu < pGVM->cCpus)
2274 {
2275 /*
2276 * Do the actual job.
2277 */
2278 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2279 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2280
2281 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2282 {
2283 /*
2284 * While we're here, do a round of scheduling.
2285 */
2286 Assert(ASMGetFlags() & X86_EFL_IF);
2287 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2288 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2289 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2290 }
2291 }
2292 else
2293 rc = VERR_INVALID_CPU_ID;
2294
2295 if (fTakeUsedLock)
2296 {
2297 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2298 AssertRC(rc2);
2299 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2300 }
2301 }
2302
2303 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2304 return rc;
2305}
2306
2307
2308/**
2309 * Wakes up the halted EMT thread so it can service a pending request.
2310 *
2311 * @returns VBox status code.
2312 * @retval VINF_SUCCESS if successfully woken up.
2313 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2314 *
2315 * @param pGVM The global (ring-0) VM structure.
2316 * @param pVM The cross context VM structure.
2317 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2318 * @thread Any but EMT(idCpu).
2319 */
2320GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2321{
2322 return GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
2323}
2324
2325
2326/**
2327 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2328 * parameter and no used locking.
2329 *
2330 * @returns VBox status code.
2331 * @retval VINF_SUCCESS if successfully woken up.
2332 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2333 *
2334 * @param pVM The cross context VM structure.
2335 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2336 * @thread Any but EMT(idCpu).
2337 * @deprecated Don't use in new code if possible! Use the GVM variant.
2338 */
2339GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PVM pVM, VMCPUID idCpu)
2340{
2341 GVMM_CHECK_SMAP_SETUP();
2342 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2343 PGVM pGVM;
2344 PGVMM pGVMM;
2345 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/);
2346 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2347 if (RT_SUCCESS(rc))
2348 rc = GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, false /*fTakeUsedLock*/);
2349 return rc;
2350}
2351
2352
2353/**
2354 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2355 * the Virtual CPU if it's still busy executing guest code.
2356 *
2357 * @returns VBox status code.
2358 * @retval VINF_SUCCESS if poked successfully.
2359 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2360 *
2361 * @param pGVM The global (ring-0) VM structure.
2362 * @param pVCpu The cross context virtual CPU structure.
2363 */
2364DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPU pVCpu)
2365{
2366 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2367
2368 RTCPUID idHostCpu = pVCpu->idHostCpu;
2369 if ( idHostCpu == NIL_RTCPUID
2370 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2371 {
2372 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2373 return VINF_GVM_NOT_BUSY_IN_GC;
2374 }
2375
2376 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2377 RTMpPokeCpu(idHostCpu);
2378 return VINF_SUCCESS;
2379}
2380
2381
2382/**
2383 * Pokes an EMT if it's still busy running guest code.
2384 *
2385 * @returns VBox status code.
2386 * @retval VINF_SUCCESS if poked successfully.
2387 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2388 *
2389 * @param pGVM The global (ring-0) VM structure.
2390 * @param pVM The cross context VM structure.
2391 * @param idCpu The ID of the virtual CPU to poke.
2392 * @param fTakeUsedLock Take the used lock or not
2393 */
2394GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
2395{
2396 /*
2397 * Validate input and take the UsedLock.
2398 */
2399 PGVMM pGVMM;
2400 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
2401 if (RT_SUCCESS(rc))
2402 {
2403 if (idCpu < pGVM->cCpus)
2404 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2405 else
2406 rc = VERR_INVALID_CPU_ID;
2407
2408 if (fTakeUsedLock)
2409 {
2410 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2411 AssertRC(rc2);
2412 }
2413 }
2414
2415 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2416 return rc;
2417}
2418
2419
2420/**
2421 * Pokes an EMT if it's still busy running guest code.
2422 *
2423 * @returns VBox status code.
2424 * @retval VINF_SUCCESS if poked successfully.
2425 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2426 *
2427 * @param pGVM The global (ring-0) VM structure.
2428 * @param pVM The cross context VM structure.
2429 * @param idCpu The ID of the virtual CPU to poke.
2430 */
2431GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2432{
2433 return GVMMR0SchedPokeEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
2434}
2435
2436
2437/**
2438 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2439 * used locking.
2440 *
2441 * @returns VBox status code.
2442 * @retval VINF_SUCCESS if poked successfully.
2443 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2444 *
2445 * @param pVM The cross context VM structure.
2446 * @param idCpu The ID of the virtual CPU to poke.
2447 *
2448 * @deprecated Don't use in new code if possible! Use the GVM variant.
2449 */
2450GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PVM pVM, VMCPUID idCpu)
2451{
2452 PGVM pGVM;
2453 PGVMM pGVMM;
2454 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/);
2455 if (RT_SUCCESS(rc))
2456 {
2457 if (idCpu < pGVM->cCpus)
2458 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2459 else
2460 rc = VERR_INVALID_CPU_ID;
2461 }
2462 return rc;
2463}
2464
2465
2466/**
2467 * Wakes up a set of halted EMT threads so they can service pending request.
2468 *
2469 * @returns VBox status code, no informational stuff.
2470 *
2471 * @param pGVM The global (ring-0) VM structure.
2472 * @param pVM The cross context VM structure.
2473 * @param pSleepSet The set of sleepers to wake up.
2474 * @param pPokeSet The set of CPUs to poke.
2475 */
2476GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PVM pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2477{
2478 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2479 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2480 GVMM_CHECK_SMAP_SETUP();
2481 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2482 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2483
2484 /*
2485 * Validate input and take the UsedLock.
2486 */
2487 PGVMM pGVMM;
2488 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /* fTakeUsedLock */);
2489 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2490 if (RT_SUCCESS(rc))
2491 {
2492 rc = VINF_SUCCESS;
2493 VMCPUID idCpu = pGVM->cCpus;
2494 while (idCpu-- > 0)
2495 {
2496 /* Don't try poke or wake up ourselves. */
2497 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2498 continue;
2499
2500 /* just ignore errors for now. */
2501 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2502 {
2503 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2504 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2505 }
2506 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2507 {
2508 gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2509 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2510 }
2511 }
2512
2513 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2514 AssertRC(rc2);
2515 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2516 }
2517
2518 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2519 return rc;
2520}
2521
2522
2523/**
2524 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2525 *
2526 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2527 * @param pGVM The global (ring-0) VM structure.
2528 * @param pVM The cross context VM structure.
2529 * @param pReq Pointer to the request packet.
2530 */
2531GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PVM pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2532{
2533 /*
2534 * Validate input and pass it on.
2535 */
2536 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2537 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2538
2539 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, pVM, &pReq->SleepSet, &pReq->PokeSet);
2540}
2541
2542
2543
2544/**
2545 * Poll the schedule to see if someone else should get a chance to run.
2546 *
2547 * This is a bit hackish and will not work too well if the machine is
2548 * under heavy load from non-VM processes.
2549 *
2550 * @returns VINF_SUCCESS if not yielded.
2551 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2552 * @param pGVM The global (ring-0) VM structure.
2553 * @param pVM The cross context VM structure.
2554 * @param idCpu The Virtual CPU ID of the calling EMT.
2555 * @param fYield Whether to yield or not.
2556 * This is for when we're spinning in the halt loop.
2557 * @thread EMT(idCpu).
2558 */
2559GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fYield)
2560{
2561 /*
2562 * Validate input.
2563 */
2564 PGVMM pGVMM;
2565 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2566 if (RT_SUCCESS(rc))
2567 {
2568 /*
2569 * We currently only implement helping doing wakeups (fYield = false), so don't
2570 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2571 */
2572 if (!fYield && pGVMM->fDoEarlyWakeUps)
2573 {
2574 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2575 pGVM->gvmm.s.StatsSched.cPollCalls++;
2576
2577 Assert(ASMGetFlags() & X86_EFL_IF);
2578 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2579
2580 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2581
2582 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2583 }
2584 /*
2585 * Not quite sure what we could do here...
2586 */
2587 else if (fYield)
2588 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2589 else
2590 rc = VINF_SUCCESS;
2591 }
2592
2593 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2594 return rc;
2595}
2596
2597
2598#ifdef GVMM_SCHED_WITH_PPT
2599/**
2600 * Timer callback for the periodic preemption timer.
2601 *
2602 * @param pTimer The timer handle.
2603 * @param pvUser Pointer to the per cpu structure.
2604 * @param iTick The current tick.
2605 */
2606static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2607{
2608 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2609 NOREF(pTimer); NOREF(iTick);
2610
2611 /*
2612 * Termination check
2613 */
2614 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2615 return;
2616
2617 /*
2618 * Do the house keeping.
2619 */
2620 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2621
2622 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2623 {
2624 /*
2625 * Historicize the max frequency.
2626 */
2627 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2628 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2629 pCpu->Ppt.iTickHistorization = 0;
2630 pCpu->Ppt.uDesiredHz = 0;
2631
2632 /*
2633 * Check if the current timer frequency.
2634 */
2635 uint32_t uHistMaxHz = 0;
2636 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2637 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2638 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2639 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2640 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2641 else if (uHistMaxHz)
2642 {
2643 /*
2644 * Reprogram it.
2645 */
2646 pCpu->Ppt.cChanges++;
2647 pCpu->Ppt.iTickHistorization = 0;
2648 pCpu->Ppt.uTimerHz = uHistMaxHz;
2649 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2650 pCpu->Ppt.cNsInterval = cNsInterval;
2651 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2652 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2653 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2654 / cNsInterval;
2655 else
2656 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2657 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2658
2659 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2660 RTTimerChangeInterval(pTimer, cNsInterval);
2661 }
2662 else
2663 {
2664 /*
2665 * Stop it.
2666 */
2667 pCpu->Ppt.fStarted = false;
2668 pCpu->Ppt.uTimerHz = 0;
2669 pCpu->Ppt.cNsInterval = 0;
2670 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2671
2672 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2673 RTTimerStop(pTimer);
2674 }
2675 }
2676 else
2677 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2678}
2679#endif /* GVMM_SCHED_WITH_PPT */
2680
2681
2682/**
2683 * Updates the periodic preemption timer for the calling CPU.
2684 *
2685 * The caller must have disabled preemption!
2686 * The caller must check that the host can do high resolution timers.
2687 *
2688 * @param pVM The cross context VM structure.
2689 * @param idHostCpu The current host CPU id.
2690 * @param uHz The desired frequency.
2691 */
2692GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz)
2693{
2694 NOREF(pVM);
2695#ifdef GVMM_SCHED_WITH_PPT
2696 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2697 Assert(RTTimerCanDoHighResolution());
2698
2699 /*
2700 * Resolve the per CPU data.
2701 */
2702 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2703 PGVMM pGVMM = g_pGVMM;
2704 if ( !VALID_PTR(pGVMM)
2705 || pGVMM->u32Magic != GVMM_MAGIC)
2706 return;
2707 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2708 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2709 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2710 && pCpu->idCpu == idHostCpu,
2711 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2712
2713 /*
2714 * Check whether we need to do anything about the timer.
2715 * We have to be a little bit careful since we might be race the timer
2716 * callback here.
2717 */
2718 if (uHz > 16384)
2719 uHz = 16384; /** @todo add a query method for this! */
2720 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2721 && uHz >= pCpu->Ppt.uMinHz
2722 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2723 {
2724 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2725
2726 pCpu->Ppt.uDesiredHz = uHz;
2727 uint32_t cNsInterval = 0;
2728 if (!pCpu->Ppt.fStarted)
2729 {
2730 pCpu->Ppt.cStarts++;
2731 pCpu->Ppt.fStarted = true;
2732 pCpu->Ppt.fStarting = true;
2733 pCpu->Ppt.iTickHistorization = 0;
2734 pCpu->Ppt.uTimerHz = uHz;
2735 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2736 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2737 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2738 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2739 / cNsInterval;
2740 else
2741 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2742 }
2743
2744 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2745
2746 if (cNsInterval)
2747 {
2748 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2749 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2750 AssertRC(rc);
2751
2752 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2753 if (RT_FAILURE(rc))
2754 pCpu->Ppt.fStarted = false;
2755 pCpu->Ppt.fStarting = false;
2756 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2757 }
2758 }
2759#else /* !GVMM_SCHED_WITH_PPT */
2760 NOREF(idHostCpu); NOREF(uHz);
2761#endif /* !GVMM_SCHED_WITH_PPT */
2762}
2763
2764
2765/**
2766 * Retrieves the GVMM statistics visible to the caller.
2767 *
2768 * @returns VBox status code.
2769 *
2770 * @param pStats Where to put the statistics.
2771 * @param pSession The current session.
2772 * @param pGVM The GVM to obtain statistics for. Optional.
2773 * @param pVM The VM structure corresponding to @a pGVM.
2774 */
2775GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
2776{
2777 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
2778
2779 /*
2780 * Validate input.
2781 */
2782 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2783 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2784 pStats->cVMs = 0; /* (crash before taking the sem...) */
2785
2786 /*
2787 * Take the lock and get the VM statistics.
2788 */
2789 PGVMM pGVMM;
2790 if (pGVM)
2791 {
2792 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/);
2793 if (RT_FAILURE(rc))
2794 return rc;
2795 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2796 }
2797 else
2798 {
2799 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2800 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2801
2802 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2803 AssertRCReturn(rc, rc);
2804 }
2805
2806 /*
2807 * Enumerate the VMs and add the ones visible to the statistics.
2808 */
2809 pStats->cVMs = 0;
2810 pStats->cEMTs = 0;
2811 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2812
2813 for (unsigned i = pGVMM->iUsedHead;
2814 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2815 i = pGVMM->aHandles[i].iNext)
2816 {
2817 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
2818 void *pvObj = pGVMM->aHandles[i].pvObj;
2819 if ( VALID_PTR(pvObj)
2820 && VALID_PTR(pOtherGVM)
2821 && pOtherGVM->u32Magic == GVM_MAGIC
2822 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2823 {
2824 pStats->cVMs++;
2825 pStats->cEMTs += pOtherGVM->cCpus;
2826
2827 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
2828 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
2829 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
2830 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2831 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
2832
2833 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
2834 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2835 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2836
2837 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
2838 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
2839
2840 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
2841 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
2842 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
2843 }
2844 }
2845
2846 /*
2847 * Copy out the per host CPU statistics.
2848 */
2849 uint32_t iDstCpu = 0;
2850 uint32_t cSrcCpus = pGVMM->cHostCpus;
2851 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2852 {
2853 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2854 {
2855 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2856 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2857#ifdef GVMM_SCHED_WITH_PPT
2858 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2859 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2860 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2861 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2862#else
2863 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2864 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2865 pStats->aHostCpus[iDstCpu].cChanges = 0;
2866 pStats->aHostCpus[iDstCpu].cStarts = 0;
2867#endif
2868 iDstCpu++;
2869 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
2870 break;
2871 }
2872 }
2873 pStats->cHostCpus = iDstCpu;
2874
2875 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2876
2877 return VINF_SUCCESS;
2878}
2879
2880
2881/**
2882 * VMMR0 request wrapper for GVMMR0QueryStatistics.
2883 *
2884 * @returns see GVMMR0QueryStatistics.
2885 * @param pGVM The global (ring-0) VM structure. Optional.
2886 * @param pVM The cross context VM structure. Optional.
2887 * @param pReq Pointer to the request packet.
2888 * @param pSession The current session.
2889 */
2890GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PVM pVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
2891{
2892 /*
2893 * Validate input and pass it on.
2894 */
2895 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2896 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2897 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
2898
2899 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM, pVM);
2900}
2901
2902
2903/**
2904 * Resets the specified GVMM statistics.
2905 *
2906 * @returns VBox status code.
2907 *
2908 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
2909 * @param pSession The current session.
2910 * @param pGVM The GVM to reset statistics for. Optional.
2911 * @param pVM The VM structure corresponding to @a pGVM.
2912 */
2913GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
2914{
2915 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
2916
2917 /*
2918 * Validate input.
2919 */
2920 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2921 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2922
2923 /*
2924 * Take the lock and get the VM statistics.
2925 */
2926 PGVMM pGVMM;
2927 if (pGVM)
2928 {
2929 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/);
2930 if (RT_FAILURE(rc))
2931 return rc;
2932# define MAYBE_RESET_FIELD(field) \
2933 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2934 MAYBE_RESET_FIELD(cHaltCalls);
2935 MAYBE_RESET_FIELD(cHaltBlocking);
2936 MAYBE_RESET_FIELD(cHaltTimeouts);
2937 MAYBE_RESET_FIELD(cHaltNotBlocking);
2938 MAYBE_RESET_FIELD(cHaltWakeUps);
2939 MAYBE_RESET_FIELD(cWakeUpCalls);
2940 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2941 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2942 MAYBE_RESET_FIELD(cPokeCalls);
2943 MAYBE_RESET_FIELD(cPokeNotBusy);
2944 MAYBE_RESET_FIELD(cPollCalls);
2945 MAYBE_RESET_FIELD(cPollHalts);
2946 MAYBE_RESET_FIELD(cPollWakeUps);
2947# undef MAYBE_RESET_FIELD
2948 }
2949 else
2950 {
2951 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2952
2953 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2954 AssertRCReturn(rc, rc);
2955 }
2956
2957 /*
2958 * Enumerate the VMs and add the ones visible to the statistics.
2959 */
2960 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
2961 {
2962 for (unsigned i = pGVMM->iUsedHead;
2963 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2964 i = pGVMM->aHandles[i].iNext)
2965 {
2966 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
2967 void *pvObj = pGVMM->aHandles[i].pvObj;
2968 if ( VALID_PTR(pvObj)
2969 && VALID_PTR(pOtherGVM)
2970 && pOtherGVM->u32Magic == GVM_MAGIC
2971 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2972 {
2973# define MAYBE_RESET_FIELD(field) \
2974 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2975 MAYBE_RESET_FIELD(cHaltCalls);
2976 MAYBE_RESET_FIELD(cHaltBlocking);
2977 MAYBE_RESET_FIELD(cHaltTimeouts);
2978 MAYBE_RESET_FIELD(cHaltNotBlocking);
2979 MAYBE_RESET_FIELD(cHaltWakeUps);
2980 MAYBE_RESET_FIELD(cWakeUpCalls);
2981 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2982 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2983 MAYBE_RESET_FIELD(cPokeCalls);
2984 MAYBE_RESET_FIELD(cPokeNotBusy);
2985 MAYBE_RESET_FIELD(cPollCalls);
2986 MAYBE_RESET_FIELD(cPollHalts);
2987 MAYBE_RESET_FIELD(cPollWakeUps);
2988# undef MAYBE_RESET_FIELD
2989 }
2990 }
2991 }
2992
2993 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2994
2995 return VINF_SUCCESS;
2996}
2997
2998
2999/**
3000 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3001 *
3002 * @returns see GVMMR0ResetStatistics.
3003 * @param pGVM The global (ring-0) VM structure. Optional.
3004 * @param pVM The cross context VM structure. Optional.
3005 * @param pReq Pointer to the request packet.
3006 * @param pSession The current session.
3007 */
3008GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PVM pVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3009{
3010 /*
3011 * Validate input and pass it on.
3012 */
3013 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3014 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3015 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3016
3017 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM, pVM);
3018}
3019
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette