VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 68009

Last change on this file since 68009 was 68009, checked in by vboxsync, 7 years ago

VMMR0,PDMR0: Adding GVM parameter and validation thereof to the generic ring-0 device & driver calls.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 106.9 KB
Line 
1/* $Id: GVMMR0.cpp 68009 2017-07-17 17:17:59Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2016 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*********************************************************************************************************************************
50* Header Files *
51*********************************************************************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/gvm.h>
57#include <VBox/vmm/vm.h>
58#include <VBox/vmm/vmcpuset.h>
59#include <VBox/vmm/vmm.h>
60#include <VBox/param.h>
61#include <VBox/err.h>
62
63#include <iprt/asm.h>
64#include <iprt/asm-amd64-x86.h>
65#include <iprt/critsect.h>
66#include <iprt/mem.h>
67#include <iprt/semaphore.h>
68#include <iprt/time.h>
69#include <VBox/log.h>
70#include <iprt/thread.h>
71#include <iprt/process.h>
72#include <iprt/param.h>
73#include <iprt/string.h>
74#include <iprt/assert.h>
75#include <iprt/mem.h>
76#include <iprt/memobj.h>
77#include <iprt/mp.h>
78#include <iprt/cpuset.h>
79#include <iprt/spinlock.h>
80#include <iprt/timer.h>
81
82#include "dtrace/VBoxVMM.h"
83
84
85/*********************************************************************************************************************************
86* Defined Constants And Macros *
87*********************************************************************************************************************************/
88#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING)
89/** Define this to enable the periodic preemption timer. */
90# define GVMM_SCHED_WITH_PPT
91#endif
92
93
94/** @def GVMM_CHECK_SMAP_SETUP
95 * SMAP check setup. */
96/** @def GVMM_CHECK_SMAP_CHECK
97 * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
98 * it will be logged and @a a_BadExpr is executed. */
99/** @def GVMM_CHECK_SMAP_CHECK2
100 * Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will
101 * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
102 * executed. */
103#if defined(VBOX_STRICT) || 1
104# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
105# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) \
106 do { \
107 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
108 { \
109 RTCCUINTREG fEflCheck = ASMGetFlags(); \
110 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
111 { /* likely */ } \
112 else \
113 { \
114 SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
115 a_BadExpr; \
116 } \
117 } \
118 } while (0)
119# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) \
120 do { \
121 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
122 { \
123 RTCCUINTREG fEflCheck = ASMGetFlags(); \
124 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
125 { /* likely */ } \
126 else \
127 { \
128 SUPR0BadContext((a_pVM) ? (a_pVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
129 a_BadExpr; \
130 } \
131 } \
132 } while (0)
133#else
134# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0
135# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures)
136# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) NOREF(fKernelFeatures)
137#endif
138
139
140
141/*********************************************************************************************************************************
142* Structures and Typedefs *
143*********************************************************************************************************************************/
144
145/**
146 * Global VM handle.
147 */
148typedef struct GVMHANDLE
149{
150 /** The index of the next handle in the list (free or used). (0 is nil.) */
151 uint16_t volatile iNext;
152 /** Our own index / handle value. */
153 uint16_t iSelf;
154 /** The process ID of the handle owner.
155 * This is used for access checks. */
156 RTPROCESS ProcId;
157 /** The pointer to the ring-0 only (aka global) VM structure. */
158 PGVM pGVM;
159 /** The ring-0 mapping of the shared VM instance data. */
160 PVM pVM;
161 /** The virtual machine object. */
162 void *pvObj;
163 /** The session this VM is associated with. */
164 PSUPDRVSESSION pSession;
165 /** The ring-0 handle of the EMT0 thread.
166 * This is used for ownership checks as well as looking up a VM handle by thread
167 * at times like assertions. */
168 RTNATIVETHREAD hEMT0;
169} GVMHANDLE;
170/** Pointer to a global VM handle. */
171typedef GVMHANDLE *PGVMHANDLE;
172
173/** Number of GVM handles (including the NIL handle). */
174#if HC_ARCH_BITS == 64
175# define GVMM_MAX_HANDLES 8192
176#else
177# define GVMM_MAX_HANDLES 128
178#endif
179
180/**
181 * Per host CPU GVMM data.
182 */
183typedef struct GVMMHOSTCPU
184{
185 /** Magic number (GVMMHOSTCPU_MAGIC). */
186 uint32_t volatile u32Magic;
187 /** The CPU ID. */
188 RTCPUID idCpu;
189 /** The CPU set index. */
190 uint32_t idxCpuSet;
191
192#ifdef GVMM_SCHED_WITH_PPT
193 /** Periodic preemption timer data. */
194 struct
195 {
196 /** The handle to the periodic preemption timer. */
197 PRTTIMER pTimer;
198 /** Spinlock protecting the data below. */
199 RTSPINLOCK hSpinlock;
200 /** The smalles Hz that we need to care about. (static) */
201 uint32_t uMinHz;
202 /** The number of ticks between each historization. */
203 uint32_t cTicksHistoriziationInterval;
204 /** The current historization tick (counting up to
205 * cTicksHistoriziationInterval and then resetting). */
206 uint32_t iTickHistorization;
207 /** The current timer interval. This is set to 0 when inactive. */
208 uint32_t cNsInterval;
209 /** The current timer frequency. This is set to 0 when inactive. */
210 uint32_t uTimerHz;
211 /** The current max frequency reported by the EMTs.
212 * This gets historicize and reset by the timer callback. This is
213 * read without holding the spinlock, so needs atomic updating. */
214 uint32_t volatile uDesiredHz;
215 /** Whether the timer was started or not. */
216 bool volatile fStarted;
217 /** Set if we're starting timer. */
218 bool volatile fStarting;
219 /** The index of the next history entry (mod it). */
220 uint32_t iHzHistory;
221 /** Historicized uDesiredHz values. The array wraps around, new entries
222 * are added at iHzHistory. This is updated approximately every
223 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
224 uint32_t aHzHistory[8];
225 /** Statistics counter for recording the number of interval changes. */
226 uint32_t cChanges;
227 /** Statistics counter for recording the number of timer starts. */
228 uint32_t cStarts;
229 } Ppt;
230#endif /* GVMM_SCHED_WITH_PPT */
231
232} GVMMHOSTCPU;
233/** Pointer to the per host CPU GVMM data. */
234typedef GVMMHOSTCPU *PGVMMHOSTCPU;
235/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
236#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
237/** The interval on history entry should cover (approximately) give in
238 * nanoseconds. */
239#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
240
241
242/**
243 * The GVMM instance data.
244 */
245typedef struct GVMM
246{
247 /** Eyecatcher / magic. */
248 uint32_t u32Magic;
249 /** The index of the head of the free handle chain. (0 is nil.) */
250 uint16_t volatile iFreeHead;
251 /** The index of the head of the active handle chain. (0 is nil.) */
252 uint16_t volatile iUsedHead;
253 /** The number of VMs. */
254 uint16_t volatile cVMs;
255 /** Alignment padding. */
256 uint16_t u16Reserved;
257 /** The number of EMTs. */
258 uint32_t volatile cEMTs;
259 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
260 uint32_t volatile cHaltedEMTs;
261 /** Mini lock for restricting early wake-ups to one thread. */
262 bool volatile fDoingEarlyWakeUps;
263 bool afPadding[3]; /**< explicit alignment padding. */
264 /** When the next halted or sleeping EMT will wake up.
265 * This is set to 0 when it needs recalculating and to UINT64_MAX when
266 * there are no halted or sleeping EMTs in the GVMM. */
267 uint64_t uNsNextEmtWakeup;
268 /** The lock used to serialize VM creation, destruction and associated events that
269 * isn't performance critical. Owners may acquire the list lock. */
270 RTCRITSECT CreateDestroyLock;
271 /** The lock used to serialize used list updates and accesses.
272 * This indirectly includes scheduling since the scheduler will have to walk the
273 * used list to examin running VMs. Owners may not acquire any other locks. */
274 RTCRITSECTRW UsedLock;
275 /** The handle array.
276 * The size of this array defines the maximum number of currently running VMs.
277 * The first entry is unused as it represents the NIL handle. */
278 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
279
280 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
281 * The number of EMTs that means we no longer consider ourselves alone on a
282 * CPU/Core.
283 */
284 uint32_t cEMTsMeansCompany;
285 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
286 * The minimum sleep time for when we're alone, in nano seconds.
287 */
288 uint32_t nsMinSleepAlone;
289 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
290 * The minimum sleep time for when we've got company, in nano seconds.
291 */
292 uint32_t nsMinSleepCompany;
293 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
294 * The limit for the first round of early wake-ups, given in nano seconds.
295 */
296 uint32_t nsEarlyWakeUp1;
297 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
298 * The limit for the second round of early wake-ups, given in nano seconds.
299 */
300 uint32_t nsEarlyWakeUp2;
301
302 /** Set if we're doing early wake-ups.
303 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
304 bool volatile fDoEarlyWakeUps;
305
306 /** The number of entries in the host CPU array (aHostCpus). */
307 uint32_t cHostCpus;
308 /** Per host CPU data (variable length). */
309 GVMMHOSTCPU aHostCpus[1];
310} GVMM;
311AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
312AssertCompileMemberAlignment(GVMM, UsedLock, 8);
313AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
314/** Pointer to the GVMM instance data. */
315typedef GVMM *PGVMM;
316
317/** The GVMM::u32Magic value (Charlie Haden). */
318#define GVMM_MAGIC UINT32_C(0x19370806)
319
320
321
322/*********************************************************************************************************************************
323* Global Variables *
324*********************************************************************************************************************************/
325/** Pointer to the GVMM instance data.
326 * (Just my general dislike for global variables.) */
327static PGVMM g_pGVMM = NULL;
328
329/** Macro for obtaining and validating the g_pGVMM pointer.
330 * On failure it will return from the invoking function with the specified return value.
331 *
332 * @param pGVMM The name of the pGVMM variable.
333 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
334 * status codes.
335 */
336#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
337 do { \
338 (pGVMM) = g_pGVMM;\
339 AssertPtrReturn((pGVMM), (rc)); \
340 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
341 } while (0)
342
343/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
344 * On failure it will return from the invoking function.
345 *
346 * @param pGVMM The name of the pGVMM variable.
347 */
348#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
349 do { \
350 (pGVMM) = g_pGVMM;\
351 AssertPtrReturnVoid((pGVMM)); \
352 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
353 } while (0)
354
355
356/*********************************************************************************************************************************
357* Internal Functions *
358*********************************************************************************************************************************/
359static void gvmmR0InitPerVMData(PGVM pGVM);
360static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
361static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
362static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM);
363static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock);
364static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM);
365
366#ifdef GVMM_SCHED_WITH_PPT
367static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
368#endif
369
370
371/**
372 * Initializes the GVMM.
373 *
374 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
375 *
376 * @returns VBox status code.
377 */
378GVMMR0DECL(int) GVMMR0Init(void)
379{
380 LogFlow(("GVMMR0Init:\n"));
381
382 /*
383 * Allocate and initialize the instance data.
384 */
385 uint32_t cHostCpus = RTMpGetArraySize();
386 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
387
388 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF(GVMM, aHostCpus[cHostCpus]));
389 if (!pGVMM)
390 return VERR_NO_MEMORY;
391 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
392 "GVMM-CreateDestroyLock");
393 if (RT_SUCCESS(rc))
394 {
395 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
396 if (RT_SUCCESS(rc))
397 {
398 pGVMM->u32Magic = GVMM_MAGIC;
399 pGVMM->iUsedHead = 0;
400 pGVMM->iFreeHead = 1;
401
402 /* the nil handle */
403 pGVMM->aHandles[0].iSelf = 0;
404 pGVMM->aHandles[0].iNext = 0;
405
406 /* the tail */
407 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
408 pGVMM->aHandles[i].iSelf = i;
409 pGVMM->aHandles[i].iNext = 0; /* nil */
410
411 /* the rest */
412 while (i-- > 1)
413 {
414 pGVMM->aHandles[i].iSelf = i;
415 pGVMM->aHandles[i].iNext = i + 1;
416 }
417
418 /* The default configuration values. */
419 uint32_t cNsResolution = RTSemEventMultiGetResolution();
420 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
421 if (cNsResolution >= 5*RT_NS_100US)
422 {
423 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
424 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
425 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
426 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
427 }
428 else if (cNsResolution > RT_NS_100US)
429 {
430 pGVMM->nsMinSleepAlone = cNsResolution / 2;
431 pGVMM->nsMinSleepCompany = cNsResolution / 4;
432 pGVMM->nsEarlyWakeUp1 = 0;
433 pGVMM->nsEarlyWakeUp2 = 0;
434 }
435 else
436 {
437 pGVMM->nsMinSleepAlone = 2000;
438 pGVMM->nsMinSleepCompany = 2000;
439 pGVMM->nsEarlyWakeUp1 = 0;
440 pGVMM->nsEarlyWakeUp2 = 0;
441 }
442 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
443
444 /* The host CPU data. */
445 pGVMM->cHostCpus = cHostCpus;
446 uint32_t iCpu = cHostCpus;
447 RTCPUSET PossibleSet;
448 RTMpGetSet(&PossibleSet);
449 while (iCpu-- > 0)
450 {
451 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
452#ifdef GVMM_SCHED_WITH_PPT
453 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
454 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
455 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
456 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
457 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
458 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
459 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
460 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
461 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
462 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
463 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
464 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
465#endif
466
467 if (RTCpuSetIsMember(&PossibleSet, iCpu))
468 {
469 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
470 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
471
472#ifdef GVMM_SCHED_WITH_PPT
473 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
474 50*1000*1000 /* whatever */,
475 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
476 gvmmR0SchedPeriodicPreemptionTimerCallback,
477 &pGVMM->aHostCpus[iCpu]);
478 if (RT_SUCCESS(rc))
479 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
480 if (RT_FAILURE(rc))
481 {
482 while (iCpu < cHostCpus)
483 {
484 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
485 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
486 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
487 iCpu++;
488 }
489 break;
490 }
491#endif
492 }
493 else
494 {
495 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
496 pGVMM->aHostCpus[iCpu].u32Magic = 0;
497 }
498 }
499 if (RT_SUCCESS(rc))
500 {
501 g_pGVMM = pGVMM;
502 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
503 return VINF_SUCCESS;
504 }
505
506 /* bail out. */
507 RTCritSectRwDelete(&pGVMM->UsedLock);
508 }
509 RTCritSectDelete(&pGVMM->CreateDestroyLock);
510 }
511
512 RTMemFree(pGVMM);
513 return rc;
514}
515
516
517/**
518 * Terminates the GVM.
519 *
520 * This is called while owning the loader semaphore (see supdrvLdrFree()).
521 * And unless something is wrong, there should be absolutely no VMs
522 * registered at this point.
523 */
524GVMMR0DECL(void) GVMMR0Term(void)
525{
526 LogFlow(("GVMMR0Term:\n"));
527
528 PGVMM pGVMM = g_pGVMM;
529 g_pGVMM = NULL;
530 if (RT_UNLIKELY(!VALID_PTR(pGVMM)))
531 {
532 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
533 return;
534 }
535
536 /*
537 * First of all, stop all active timers.
538 */
539 uint32_t cActiveTimers = 0;
540 uint32_t iCpu = pGVMM->cHostCpus;
541 while (iCpu-- > 0)
542 {
543 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
544#ifdef GVMM_SCHED_WITH_PPT
545 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
546 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
547 cActiveTimers++;
548#endif
549 }
550 if (cActiveTimers)
551 RTThreadSleep(1); /* fudge */
552
553 /*
554 * Invalidate the and free resources.
555 */
556 pGVMM->u32Magic = ~GVMM_MAGIC;
557 RTCritSectRwDelete(&pGVMM->UsedLock);
558 RTCritSectDelete(&pGVMM->CreateDestroyLock);
559
560 pGVMM->iFreeHead = 0;
561 if (pGVMM->iUsedHead)
562 {
563 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
564 pGVMM->iUsedHead = 0;
565 }
566
567#ifdef GVMM_SCHED_WITH_PPT
568 iCpu = pGVMM->cHostCpus;
569 while (iCpu-- > 0)
570 {
571 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
572 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
573 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
574 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
575 }
576#endif
577
578 RTMemFree(pGVMM);
579}
580
581
582/**
583 * A quick hack for setting global config values.
584 *
585 * @returns VBox status code.
586 *
587 * @param pSession The session handle. Used for authentication.
588 * @param pszName The variable name.
589 * @param u64Value The new value.
590 */
591GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
592{
593 /*
594 * Validate input.
595 */
596 PGVMM pGVMM;
597 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
598 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
599 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
600
601 /*
602 * String switch time!
603 */
604 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
605 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
606 int rc = VINF_SUCCESS;
607 pszName += sizeof("/GVMM/") - 1;
608 if (!strcmp(pszName, "cEMTsMeansCompany"))
609 {
610 if (u64Value <= UINT32_MAX)
611 pGVMM->cEMTsMeansCompany = u64Value;
612 else
613 rc = VERR_OUT_OF_RANGE;
614 }
615 else if (!strcmp(pszName, "MinSleepAlone"))
616 {
617 if (u64Value <= RT_NS_100MS)
618 pGVMM->nsMinSleepAlone = u64Value;
619 else
620 rc = VERR_OUT_OF_RANGE;
621 }
622 else if (!strcmp(pszName, "MinSleepCompany"))
623 {
624 if (u64Value <= RT_NS_100MS)
625 pGVMM->nsMinSleepCompany = u64Value;
626 else
627 rc = VERR_OUT_OF_RANGE;
628 }
629 else if (!strcmp(pszName, "EarlyWakeUp1"))
630 {
631 if (u64Value <= RT_NS_100MS)
632 {
633 pGVMM->nsEarlyWakeUp1 = u64Value;
634 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
635 }
636 else
637 rc = VERR_OUT_OF_RANGE;
638 }
639 else if (!strcmp(pszName, "EarlyWakeUp2"))
640 {
641 if (u64Value <= RT_NS_100MS)
642 {
643 pGVMM->nsEarlyWakeUp2 = u64Value;
644 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
645 }
646 else
647 rc = VERR_OUT_OF_RANGE;
648 }
649 else
650 rc = VERR_CFGM_VALUE_NOT_FOUND;
651 return rc;
652}
653
654
655/**
656 * A quick hack for getting global config values.
657 *
658 * @returns VBox status code.
659 *
660 * @param pSession The session handle. Used for authentication.
661 * @param pszName The variable name.
662 * @param pu64Value Where to return the value.
663 */
664GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
665{
666 /*
667 * Validate input.
668 */
669 PGVMM pGVMM;
670 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
671 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
672 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
673 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
674
675 /*
676 * String switch time!
677 */
678 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
679 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
680 int rc = VINF_SUCCESS;
681 pszName += sizeof("/GVMM/") - 1;
682 if (!strcmp(pszName, "cEMTsMeansCompany"))
683 *pu64Value = pGVMM->cEMTsMeansCompany;
684 else if (!strcmp(pszName, "MinSleepAlone"))
685 *pu64Value = pGVMM->nsMinSleepAlone;
686 else if (!strcmp(pszName, "MinSleepCompany"))
687 *pu64Value = pGVMM->nsMinSleepCompany;
688 else if (!strcmp(pszName, "EarlyWakeUp1"))
689 *pu64Value = pGVMM->nsEarlyWakeUp1;
690 else if (!strcmp(pszName, "EarlyWakeUp2"))
691 *pu64Value = pGVMM->nsEarlyWakeUp2;
692 else
693 rc = VERR_CFGM_VALUE_NOT_FOUND;
694 return rc;
695}
696
697
698/**
699 * Acquire the 'used' lock in shared mode.
700 *
701 * This prevents destruction of the VM while we're in ring-0.
702 *
703 * @returns IPRT status code, see RTSemFastMutexRequest.
704 * @param a_pGVMM The GVMM instance data.
705 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
706 */
707#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
708
709/**
710 * Release the 'used' lock in when owning it in shared mode.
711 *
712 * @returns IPRT status code, see RTSemFastMutexRequest.
713 * @param a_pGVMM The GVMM instance data.
714 * @sa GVMMR0_USED_SHARED_LOCK
715 */
716#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
717
718/**
719 * Acquire the 'used' lock in exclusive mode.
720 *
721 * Only use this function when making changes to the used list.
722 *
723 * @returns IPRT status code, see RTSemFastMutexRequest.
724 * @param a_pGVMM The GVMM instance data.
725 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
726 */
727#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
728
729/**
730 * Release the 'used' lock when owning it in exclusive mode.
731 *
732 * @returns IPRT status code, see RTSemFastMutexRelease.
733 * @param a_pGVMM The GVMM instance data.
734 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
735 */
736#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
737
738
739/**
740 * Try acquire the 'create & destroy' lock.
741 *
742 * @returns IPRT status code, see RTSemFastMutexRequest.
743 * @param pGVMM The GVMM instance data.
744 */
745DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
746{
747 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
748 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
749 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
750 return rc;
751}
752
753
754/**
755 * Release the 'create & destroy' lock.
756 *
757 * @returns IPRT status code, see RTSemFastMutexRequest.
758 * @param pGVMM The GVMM instance data.
759 */
760DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
761{
762 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
763 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
764 AssertRC(rc);
765 return rc;
766}
767
768
769/**
770 * Request wrapper for the GVMMR0CreateVM API.
771 *
772 * @returns VBox status code.
773 * @param pReq The request buffer.
774 * @param pSession The session handle. The VM will be associated with this.
775 */
776GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
777{
778 /*
779 * Validate the request.
780 */
781 if (!VALID_PTR(pReq))
782 return VERR_INVALID_POINTER;
783 if (pReq->Hdr.cbReq != sizeof(*pReq))
784 return VERR_INVALID_PARAMETER;
785 if (pReq->pSession != pSession)
786 return VERR_INVALID_POINTER;
787
788 /*
789 * Execute it.
790 */
791 PVM pVM;
792 pReq->pVMR0 = NULL;
793 pReq->pVMR3 = NIL_RTR3PTR;
794 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pVM);
795 if (RT_SUCCESS(rc))
796 {
797 pReq->pVMR0 = pVM;
798 pReq->pVMR3 = pVM->pVMR3;
799 }
800 return rc;
801}
802
803
804/**
805 * Allocates the VM structure and registers it with GVM.
806 *
807 * The caller will become the VM owner and there by the EMT.
808 *
809 * @returns VBox status code.
810 * @param pSession The support driver session.
811 * @param cCpus Number of virtual CPUs for the new VM.
812 * @param ppVM Where to store the pointer to the VM structure.
813 *
814 * @thread EMT.
815 */
816GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppVM)
817{
818 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
819 PGVMM pGVMM;
820 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
821
822 AssertPtrReturn(ppVM, VERR_INVALID_POINTER);
823 *ppVM = NULL;
824
825 if ( cCpus == 0
826 || cCpus > VMM_MAX_CPU_COUNT)
827 return VERR_INVALID_PARAMETER;
828
829 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
830 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
831 RTPROCESS ProcId = RTProcSelf();
832 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
833
834 /*
835 * The whole allocation process is protected by the lock.
836 */
837 int rc = gvmmR0CreateDestroyLock(pGVMM);
838 AssertRCReturn(rc, rc);
839
840 /*
841 * Only one VM per session.
842 */
843 if (SUPR0GetSessionVM(pSession) != NULL)
844 {
845 gvmmR0CreateDestroyUnlock(pGVMM);
846 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
847 return VERR_ALREADY_EXISTS;
848 }
849
850 /*
851 * Allocate a handle first so we don't waste resources unnecessarily.
852 */
853 uint16_t iHandle = pGVMM->iFreeHead;
854 if (iHandle)
855 {
856 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
857
858 /* consistency checks, a bit paranoid as always. */
859 if ( !pHandle->pVM
860 && !pHandle->pGVM
861 && !pHandle->pvObj
862 && pHandle->iSelf == iHandle)
863 {
864 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
865 if (pHandle->pvObj)
866 {
867 /*
868 * Move the handle from the free to used list and perform permission checks.
869 */
870 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
871 AssertRC(rc);
872
873 pGVMM->iFreeHead = pHandle->iNext;
874 pHandle->iNext = pGVMM->iUsedHead;
875 pGVMM->iUsedHead = iHandle;
876 pGVMM->cVMs++;
877
878 pHandle->pVM = NULL;
879 pHandle->pGVM = NULL;
880 pHandle->pSession = pSession;
881 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
882 pHandle->ProcId = NIL_RTPROCESS;
883
884 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
885
886 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
887 if (RT_SUCCESS(rc))
888 {
889 /*
890 * Allocate the global VM structure (GVM) and initialize it.
891 */
892 PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF(GVM, aCpus[cCpus]));
893 if (pGVM)
894 {
895 pGVM->u32Magic = GVM_MAGIC;
896 pGVM->hSelf = iHandle;
897 pGVM->pVM = NULL;
898 pGVM->cCpus = cCpus;
899 pGVM->pSession = pSession;
900
901 gvmmR0InitPerVMData(pGVM);
902 GMMR0InitPerVMData(pGVM);
903
904 /*
905 * Allocate the shared VM structure and associated page array.
906 */
907 const uint32_t cbVM = RT_UOFFSETOF(VM, aCpus[cCpus]);
908 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
909 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
910 if (RT_SUCCESS(rc))
911 {
912 PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM);
913 memset(pVM, 0, cPages << PAGE_SHIFT);
914 pVM->enmVMState = VMSTATE_CREATING;
915 pVM->pVMR0 = pVM;
916 pVM->pSession = pSession;
917 pVM->hSelf = iHandle;
918 pVM->cbSelf = cbVM;
919 pVM->cCpus = cCpus;
920 pVM->uCpuExecutionCap = 100; /* default is no cap. */
921 pVM->offVMCPU = RT_UOFFSETOF(VM, aCpus);
922 AssertCompileMemberAlignment(VM, cpum, 64);
923 AssertCompileMemberAlignment(VM, tm, 64);
924 AssertCompileMemberAlignment(VM, aCpus, PAGE_SIZE);
925
926 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
927 if (RT_SUCCESS(rc))
928 {
929 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
930 for (uint32_t iPage = 0; iPage < cPages; iPage++)
931 {
932 paPages[iPage].uReserved = 0;
933 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
934 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
935 }
936
937 /*
938 * Map them into ring-3.
939 */
940 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
941 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
942 if (RT_SUCCESS(rc))
943 {
944 pVM->pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
945 AssertPtr((void *)pVM->pVMR3);
946
947 /* Initialize all the VM pointers. */
948 for (uint32_t i = 0; i < cCpus; i++)
949 {
950 pVM->aCpus[i].pVMR0 = pVM;
951 pVM->aCpus[i].pVMR3 = pVM->pVMR3;
952 pVM->aCpus[i].idHostCpu = NIL_RTCPUID;
953 pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
954 }
955
956 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
957 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
958 NIL_RTR0PROCESS);
959 if (RT_SUCCESS(rc))
960 {
961 pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
962 AssertPtr((void *)pVM->paVMPagesR3);
963
964 /* complete the handle - take the UsedLock sem just to be careful. */
965 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
966 AssertRC(rc);
967
968 pHandle->pVM = pVM;
969 pHandle->pGVM = pGVM;
970 pHandle->hEMT0 = hEMT0;
971 pHandle->ProcId = ProcId;
972 pGVM->pVM = pVM;
973 pGVM->aCpus[0].hEMT = hEMT0;
974 pVM->aCpus[0].hNativeThreadR0 = hEMT0;
975 pGVMM->cEMTs += cCpus;
976
977 /* Associate it with the session and create the context hook for EMT0. */
978 rc = SUPR0SetSessionVM(pSession, pGVM, pVM);
979 if (RT_SUCCESS(rc))
980 {
981 rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[0]);
982 if (RT_SUCCESS(rc))
983 {
984 /*
985 * Done!
986 */
987 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pVM, ProcId, (void *)hEMT0, cCpus);
988
989 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
990 gvmmR0CreateDestroyUnlock(pGVMM);
991
992 CPUMR0RegisterVCpuThread(&pVM->aCpus[0]);
993
994 *ppVM = pVM;
995 Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVM->pVMR3, pGVM, iHandle));
996 return VINF_SUCCESS;
997 }
998
999 SUPR0SetSessionVM(pSession, NULL, NULL);
1000 }
1001 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1002 }
1003
1004 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1005 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1006 }
1007 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
1008 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1009 }
1010 RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
1011 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1012 }
1013 }
1014 }
1015 /* else: The user wasn't permitted to create this VM. */
1016
1017 /*
1018 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1019 * object reference here. A little extra mess because of non-recursive lock.
1020 */
1021 void *pvObj = pHandle->pvObj;
1022 pHandle->pvObj = NULL;
1023 gvmmR0CreateDestroyUnlock(pGVMM);
1024
1025 SUPR0ObjRelease(pvObj, pSession);
1026
1027 SUPR0Printf("GVMMR0CreateVM: failed, rc=%d\n", rc);
1028 return rc;
1029 }
1030
1031 rc = VERR_NO_MEMORY;
1032 }
1033 else
1034 rc = VERR_GVMM_IPE_1;
1035 }
1036 else
1037 rc = VERR_GVM_TOO_MANY_VMS;
1038
1039 gvmmR0CreateDestroyUnlock(pGVMM);
1040 return rc;
1041}
1042
1043
1044/**
1045 * Initializes the per VM data belonging to GVMM.
1046 *
1047 * @param pGVM Pointer to the global VM structure.
1048 */
1049static void gvmmR0InitPerVMData(PGVM pGVM)
1050{
1051 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1052 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1053 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1054 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1055 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1056 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1057 pGVM->gvmm.s.fDoneVMMR0Init = false;
1058 pGVM->gvmm.s.fDoneVMMR0Term = false;
1059
1060 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1061 {
1062 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1063 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1064 }
1065}
1066
1067
1068/**
1069 * Does the VM initialization.
1070 *
1071 * @returns VBox status code.
1072 * @param pVM The cross context VM structure.
1073 */
1074GVMMR0DECL(int) GVMMR0InitVM(PVM pVM)
1075{
1076 LogFlow(("GVMMR0InitVM: pVM=%p\n", pVM));
1077
1078 /*
1079 * Validate the VM structure, state and handle.
1080 */
1081 PGVM pGVM;
1082 PGVMM pGVMM;
1083 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1084 if (RT_SUCCESS(rc))
1085 {
1086 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1087 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1088 {
1089 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1090 {
1091 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1092 if (RT_FAILURE(rc))
1093 {
1094 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1095 break;
1096 }
1097 }
1098 }
1099 else
1100 rc = VERR_WRONG_ORDER;
1101 }
1102
1103 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1104 return rc;
1105}
1106
1107
1108/**
1109 * Indicates that we're done with the ring-0 initialization
1110 * of the VM.
1111 *
1112 * @param pVM The cross context VM structure.
1113 * @thread EMT(0)
1114 */
1115GVMMR0DECL(void) GVMMR0DoneInitVM(PVM pVM)
1116{
1117 /* Validate the VM structure, state and handle. */
1118 PGVM pGVM;
1119 PGVMM pGVMM;
1120 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1121 AssertRCReturnVoid(rc);
1122
1123 /* Set the indicator. */
1124 pGVM->gvmm.s.fDoneVMMR0Init = true;
1125}
1126
1127
1128/**
1129 * Indicates that we're doing the ring-0 termination of the VM.
1130 *
1131 * @returns true if termination hasn't been done already, false if it has.
1132 * @param pVM The cross context VM structure.
1133 * @param pGVM Pointer to the global VM structure. Optional.
1134 * @thread EMT(0)
1135 */
1136GVMMR0DECL(bool) GVMMR0DoingTermVM(PVM pVM, PGVM pGVM)
1137{
1138 /* Validate the VM structure, state and handle. */
1139 AssertPtrNullReturn(pGVM, false);
1140 AssertReturn(!pGVM || pGVM->u32Magic == GVM_MAGIC, false);
1141 if (!pGVM)
1142 {
1143 PGVMM pGVMM;
1144 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1145 AssertRCReturn(rc, false);
1146 }
1147
1148 /* Set the indicator. */
1149 if (pGVM->gvmm.s.fDoneVMMR0Term)
1150 return false;
1151 pGVM->gvmm.s.fDoneVMMR0Term = true;
1152 return true;
1153}
1154
1155
1156/**
1157 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1158 *
1159 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1160 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1161 * would've been nice if the caller was actually the EMT thread or that we somehow
1162 * could've associated the calling thread with the VM up front.
1163 *
1164 * @returns VBox status code.
1165 * @param pGVM The global (ring-0) VM structure.
1166 * @param pVM The cross context VM structure.
1167 *
1168 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1169 */
1170GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM, PVM pVM)
1171{
1172 LogFlow(("GVMMR0DestroyVM: pGVM=%p pVM=%p\n", pGVM, pVM));
1173 PGVMM pGVMM;
1174 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1175
1176 /*
1177 * Validate the VM structure, state and caller.
1178 */
1179 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1180 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1181 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1182 AssertReturn(pGVM->pVM == pVM, VERR_INVALID_POINTER);
1183 AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState),
1184 VERR_WRONG_ORDER);
1185
1186 uint32_t hGVM = pGVM->hSelf;
1187 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1188 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1189
1190 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1191 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1192
1193 RTPROCESS ProcId = RTProcSelf();
1194 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1195 AssertReturn( ( pHandle->hEMT0 == hSelf
1196 && pHandle->ProcId == ProcId)
1197 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1198
1199 /*
1200 * Lookup the handle and destroy the object.
1201 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1202 * object, we take some precautions against racing callers just in case...
1203 */
1204 int rc = gvmmR0CreateDestroyLock(pGVMM);
1205 AssertRC(rc);
1206
1207 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1208 if ( pHandle->pVM == pVM
1209 && ( ( pHandle->hEMT0 == hSelf
1210 && pHandle->ProcId == ProcId)
1211 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1212 && VALID_PTR(pHandle->pvObj)
1213 && VALID_PTR(pHandle->pSession)
1214 && VALID_PTR(pHandle->pGVM)
1215 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1216 {
1217 /* Check that other EMTs have deregistered. */
1218 uint32_t cNotDeregistered = 0;
1219 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1220 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */
1221 if (cNotDeregistered == 0)
1222 {
1223 /* Grab the object pointer. */
1224 void *pvObj = pHandle->pvObj;
1225 pHandle->pvObj = NULL;
1226 gvmmR0CreateDestroyUnlock(pGVMM);
1227
1228 SUPR0ObjRelease(pvObj, pHandle->pSession);
1229 }
1230 else
1231 {
1232 gvmmR0CreateDestroyUnlock(pGVMM);
1233 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1234 }
1235 }
1236 else
1237 {
1238 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n",
1239 pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf);
1240 gvmmR0CreateDestroyUnlock(pGVMM);
1241 rc = VERR_GVMM_IPE_2;
1242 }
1243
1244 return rc;
1245}
1246
1247
1248/**
1249 * Performs VM cleanup task as part of object destruction.
1250 *
1251 * @param pGVM The GVM pointer.
1252 */
1253static void gvmmR0CleanupVM(PGVM pGVM)
1254{
1255 if ( pGVM->gvmm.s.fDoneVMMR0Init
1256 && !pGVM->gvmm.s.fDoneVMMR0Term)
1257 {
1258 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1259 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM)
1260 {
1261 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1262 VMMR0TermVM(pGVM->pVM, pGVM);
1263 }
1264 else
1265 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM));
1266 }
1267
1268 GMMR0CleanupVM(pGVM);
1269
1270 AssertCompile((uintptr_t)NIL_RTTHREADCTXHOOK == 0); /* Depends on zero initialized memory working for NIL at the moment. */
1271 for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++)
1272 {
1273 /** @todo Can we busy wait here for all thread-context hooks to be
1274 * deregistered before releasing (destroying) it? Only until we find a
1275 * solution for not deregistering hooks everytime we're leaving HMR0
1276 * context. */
1277 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->pVM->aCpus[idCpu]);
1278 }
1279}
1280
1281
1282/**
1283 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1284 *
1285 * pvUser1 is the GVM instance pointer.
1286 * pvUser2 is the handle pointer.
1287 */
1288static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1289{
1290 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1291
1292 NOREF(pvObj);
1293
1294 /*
1295 * Some quick, paranoid, input validation.
1296 */
1297 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1298 AssertPtr(pHandle);
1299 PGVMM pGVMM = (PGVMM)pvUser1;
1300 Assert(pGVMM == g_pGVMM);
1301 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1302 if ( !iHandle
1303 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1304 || iHandle != pHandle->iSelf)
1305 {
1306 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1307 return;
1308 }
1309
1310 int rc = gvmmR0CreateDestroyLock(pGVMM);
1311 AssertRC(rc);
1312 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1313 AssertRC(rc);
1314
1315 /*
1316 * This is a tad slow but a doubly linked list is too much hassle.
1317 */
1318 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1319 {
1320 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1321 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1322 gvmmR0CreateDestroyUnlock(pGVMM);
1323 return;
1324 }
1325
1326 if (pGVMM->iUsedHead == iHandle)
1327 pGVMM->iUsedHead = pHandle->iNext;
1328 else
1329 {
1330 uint16_t iPrev = pGVMM->iUsedHead;
1331 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1332 while (iPrev)
1333 {
1334 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1335 {
1336 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1337 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1338 gvmmR0CreateDestroyUnlock(pGVMM);
1339 return;
1340 }
1341 if (RT_UNLIKELY(c-- <= 0))
1342 {
1343 iPrev = 0;
1344 break;
1345 }
1346
1347 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1348 break;
1349 iPrev = pGVMM->aHandles[iPrev].iNext;
1350 }
1351 if (!iPrev)
1352 {
1353 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1354 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1355 gvmmR0CreateDestroyUnlock(pGVMM);
1356 return;
1357 }
1358
1359 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1360 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1361 }
1362 pHandle->iNext = 0;
1363 pGVMM->cVMs--;
1364
1365 /*
1366 * Do the global cleanup round.
1367 */
1368 PGVM pGVM = pHandle->pGVM;
1369 if ( VALID_PTR(pGVM)
1370 && pGVM->u32Magic == GVM_MAGIC)
1371 {
1372 pGVMM->cEMTs -= pGVM->cCpus;
1373
1374 if (pGVM->pSession)
1375 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1376
1377 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1378
1379 gvmmR0CleanupVM(pGVM);
1380
1381 /*
1382 * Do the GVMM cleanup - must be done last.
1383 */
1384 /* The VM and VM pages mappings/allocations. */
1385 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1386 {
1387 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1388 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1389 }
1390
1391 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1392 {
1393 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1394 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1395 }
1396
1397 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1398 {
1399 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1400 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1401 }
1402
1403 if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ)
1404 {
1405 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc);
1406 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1407 }
1408
1409 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1410 {
1411 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1412 {
1413 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1414 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1415 }
1416 }
1417
1418 /* the GVM structure itself. */
1419 pGVM->u32Magic |= UINT32_C(0x80000000);
1420 RTMemFree(pGVM);
1421
1422 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1423 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1424 AssertRC(rc);
1425 }
1426 /* else: GVMMR0CreateVM cleanup. */
1427
1428 /*
1429 * Free the handle.
1430 */
1431 pHandle->iNext = pGVMM->iFreeHead;
1432 pGVMM->iFreeHead = iHandle;
1433 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1434 ASMAtomicWriteNullPtr(&pHandle->pVM);
1435 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1436 ASMAtomicWriteNullPtr(&pHandle->pSession);
1437 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1438 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1439
1440 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1441 gvmmR0CreateDestroyUnlock(pGVMM);
1442 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1443}
1444
1445
1446/**
1447 * Registers the calling thread as the EMT of a Virtual CPU.
1448 *
1449 * Note that VCPU 0 is automatically registered during VM creation.
1450 *
1451 * @returns VBox status code
1452 * @param pGVM The global (ring-0) VM structure.
1453 * @param pVM The cross context VM structure.
1454 * @param idCpu VCPU id to register the current thread as.
1455 */
1456GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1457{
1458 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1459
1460 /*
1461 * Validate the VM structure, state and handle.
1462 */
1463 PGVMM pGVMM;
1464 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /* fTakeUsedLock */); /** @todo take lock here. */
1465 if (RT_SUCCESS(rc))
1466 {
1467 if (idCpu < pGVM->cCpus)
1468 {
1469 /* Check that the EMT isn't already assigned to a thread. */
1470 if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
1471 {
1472 Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1473
1474 /* A thread may only be one EMT. */
1475 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1476 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1477 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1478 if (RT_SUCCESS(rc))
1479 {
1480 /*
1481 * Do the assignment, then try setup the hook. Undo if that fails.
1482 */
1483 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1484
1485 rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[idCpu]);
1486 if (RT_SUCCESS(rc))
1487 CPUMR0RegisterVCpuThread(&pVM->aCpus[idCpu]);
1488 else
1489 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1490 }
1491 }
1492 else
1493 rc = VERR_ACCESS_DENIED;
1494 }
1495 else
1496 rc = VERR_INVALID_CPU_ID;
1497 }
1498 return rc;
1499}
1500
1501
1502/**
1503 * Deregisters the calling thread as the EMT of a Virtual CPU.
1504 *
1505 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1506 *
1507 * @returns VBox status code
1508 * @param pGVM The global (ring-0) VM structure.
1509 * @param pVM The cross context VM structure.
1510 * @param idCpu VCPU id to register the current thread as.
1511 */
1512GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1513{
1514 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1515
1516 /*
1517 * Validate the VM structure, state and handle.
1518 */
1519 PGVMM pGVMM;
1520 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
1521 if (RT_SUCCESS(rc))
1522 {
1523 /*
1524 * Take the destruction lock and recheck the handle state to
1525 * prevent racing GVMMR0DestroyVM.
1526 */
1527 gvmmR0CreateDestroyLock(pGVMM);
1528 uint32_t hSelf = pGVM->hSelf;
1529 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1530 && pGVMM->aHandles[hSelf].pvObj != NULL
1531 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1532 {
1533 /*
1534 * Do per-EMT cleanups.
1535 */
1536 VMMR0ThreadCtxHookDestroyForEmt(&pVM->aCpus[idCpu]);
1537
1538 /*
1539 * Invalidate hEMT. We don't use NIL here as that would allow
1540 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1541 */
1542 AssertCompile(~(RTNATIVETHREAD)1 != NIL_RTNATIVETHREAD);
1543 pGVM->aCpus[idCpu].hEMT = ~(RTNATIVETHREAD)1;
1544 pVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1545 }
1546
1547 gvmmR0CreateDestroyUnlock(pGVMM);
1548 }
1549 return rc;
1550}
1551
1552
1553/**
1554 * Lookup a GVM structure by its handle.
1555 *
1556 * @returns The GVM pointer on success, NULL on failure.
1557 * @param hGVM The global VM handle. Asserts on bad handle.
1558 */
1559GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1560{
1561 PGVMM pGVMM;
1562 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1563
1564 /*
1565 * Validate.
1566 */
1567 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1568 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1569
1570 /*
1571 * Look it up.
1572 */
1573 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1574 AssertPtrReturn(pHandle->pVM, NULL);
1575 AssertPtrReturn(pHandle->pvObj, NULL);
1576 PGVM pGVM = pHandle->pGVM;
1577 AssertPtrReturn(pGVM, NULL);
1578 AssertReturn(pGVM->pVM == pHandle->pVM, NULL);
1579
1580 return pHandle->pGVM;
1581}
1582
1583
1584/**
1585 * Lookup a GVM structure by the shared VM structure.
1586 *
1587 * The calling thread must be in the same process as the VM. All current lookups
1588 * are by threads inside the same process, so this will not be an issue.
1589 *
1590 * @returns VBox status code.
1591 * @param pVM The cross context VM structure.
1592 * @param ppGVM Where to store the GVM pointer.
1593 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1594 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1595 * shared mode when requested.
1596 *
1597 * Be very careful if not taking the lock as it's
1598 * possible that the VM will disappear then!
1599 *
1600 * @remark This will not assert on an invalid pVM but try return silently.
1601 */
1602static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1603{
1604 RTPROCESS ProcId = RTProcSelf();
1605 PGVMM pGVMM;
1606 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1607
1608 /*
1609 * Validate.
1610 */
1611 if (RT_UNLIKELY( !VALID_PTR(pVM)
1612 || ((uintptr_t)pVM & PAGE_OFFSET_MASK)))
1613 return VERR_INVALID_POINTER;
1614 if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING
1615 || pVM->enmVMState >= VMSTATE_TERMINATED))
1616 return VERR_INVALID_POINTER;
1617
1618 uint16_t hGVM = pVM->hSelf;
1619 if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE
1620 || hGVM >= RT_ELEMENTS(pGVMM->aHandles)))
1621 return VERR_INVALID_HANDLE;
1622
1623 /*
1624 * Look it up.
1625 */
1626 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1627 PGVM pGVM;
1628 if (fTakeUsedLock)
1629 {
1630 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1631 AssertRCReturn(rc, rc);
1632
1633 pGVM = pHandle->pGVM;
1634 if (RT_UNLIKELY( pHandle->pVM != pVM
1635 || pHandle->ProcId != ProcId
1636 || !VALID_PTR(pHandle->pvObj)
1637 || !VALID_PTR(pGVM)
1638 || pGVM->pVM != pVM))
1639 {
1640 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1641 return VERR_INVALID_HANDLE;
1642 }
1643 }
1644 else
1645 {
1646 if (RT_UNLIKELY(pHandle->pVM != pVM))
1647 return VERR_INVALID_HANDLE;
1648 if (RT_UNLIKELY(pHandle->ProcId != ProcId))
1649 return VERR_INVALID_HANDLE;
1650 if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj)))
1651 return VERR_INVALID_HANDLE;
1652
1653 pGVM = pHandle->pGVM;
1654 if (RT_UNLIKELY(!VALID_PTR(pGVM)))
1655 return VERR_INVALID_HANDLE;
1656 if (RT_UNLIKELY(pGVM->pVM != pVM))
1657 return VERR_INVALID_HANDLE;
1658 }
1659
1660 *ppGVM = pGVM;
1661 *ppGVMM = pGVMM;
1662 return VINF_SUCCESS;
1663}
1664
1665
1666/**
1667 * Check that the given GVM and VM structures match up.
1668 *
1669 * The calling thread must be in the same process as the VM. All current lookups
1670 * are by threads inside the same process, so this will not be an issue.
1671 *
1672 * @returns VBox status code.
1673 * @param pGVM The global (ring-0) VM structure.
1674 * @param pVM The cross context VM structure.
1675 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1676 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1677 * shared mode when requested.
1678 *
1679 * Be very careful if not taking the lock as it's
1680 * possible that the VM will disappear then!
1681 *
1682 * @remark This will not assert on an invalid pVM but try return silently.
1683 */
1684static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1685{
1686 /*
1687 * Check the pointers.
1688 */
1689 int rc;
1690 if (RT_LIKELY(RT_VALID_PTR(pGVM)))
1691 {
1692 if (RT_LIKELY( RT_VALID_PTR(pVM)
1693 && ((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0))
1694 {
1695 if (RT_LIKELY(pGVM->pVM == pVM))
1696 {
1697 /*
1698 * Get the pGVMM instance and check the VM handle.
1699 */
1700 PGVMM pGVMM;
1701 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1702
1703 uint16_t hGVM = pGVM->hSelf;
1704 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1705 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1706 {
1707 RTPROCESS const pidSelf = RTProcSelf();
1708 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1709 if (fTakeUsedLock)
1710 {
1711 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1712 AssertRCReturn(rc, rc);
1713 }
1714
1715 if (RT_LIKELY( pHandle->pGVM == pGVM
1716 && pHandle->pVM == pVM
1717 && pHandle->ProcId == pidSelf
1718 && RT_VALID_PTR(pHandle->pvObj)))
1719 {
1720 /*
1721 * Some more VM data consistency checks.
1722 */
1723 if (RT_LIKELY( pVM->cCpus == pGVM->cCpus
1724 && pVM->hSelf == hGVM
1725 && pVM->enmVMState >= VMSTATE_CREATING
1726 && pVM->enmVMState <= VMSTATE_TERMINATED
1727 && pVM->pVMR0 == pVM))
1728 {
1729 *ppGVMM = pGVMM;
1730 return VINF_SUCCESS;
1731 }
1732 }
1733
1734 if (fTakeUsedLock)
1735 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1736 }
1737 }
1738 rc = VERR_INVALID_VM_HANDLE;
1739 }
1740 else
1741 rc = VERR_INVALID_POINTER;
1742 }
1743 else
1744 rc = VERR_INVALID_POINTER;
1745 return rc;
1746}
1747
1748
1749/**
1750 * Lookup a GVM structure by the shared VM structure.
1751 *
1752 * @returns VBox status code.
1753 * @param pVM The cross context VM structure.
1754 * @param ppGVM Where to store the GVM pointer.
1755 *
1756 * @remark This will not take the 'used'-lock because it doesn't do
1757 * nesting and this function will be used from under the lock.
1758 * Update: This is no longer true. Consider taking the lock in shared
1759 * mode!
1760 */
1761GVMMR0DECL(int) GVMMR0ByVM(PVM pVM, PGVM *ppGVM)
1762{
1763 PGVMM pGVMM;
1764 return gvmmR0ByVM(pVM, ppGVM, &pGVMM, false /* fTakeUsedLock */);
1765}
1766
1767
1768/**
1769 * Lookup a GVM structure by the shared VM structure and ensuring that the
1770 * caller is an EMT thread.
1771 *
1772 * @returns VBox status code.
1773 * @param pVM The cross context VM structure.
1774 * @param idCpu The Virtual CPU ID of the calling EMT.
1775 * @param ppGVM Where to store the GVM pointer.
1776 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1777 * @thread EMT
1778 *
1779 * @remarks This will assert in all failure paths.
1780 */
1781static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM)
1782{
1783 PGVMM pGVMM;
1784 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1785
1786 /*
1787 * Validate.
1788 */
1789 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1790 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1791
1792 uint16_t hGVM = pVM->hSelf;
1793 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_HANDLE);
1794 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_HANDLE);
1795
1796 /*
1797 * Look it up.
1798 */
1799 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1800 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1801 RTPROCESS ProcId = RTProcSelf();
1802 AssertReturn(pHandle->ProcId == ProcId, VERR_NOT_OWNER);
1803 AssertPtrReturn(pHandle->pvObj, VERR_NOT_OWNER);
1804
1805 PGVM pGVM = pHandle->pGVM;
1806 AssertPtrReturn(pGVM, VERR_NOT_OWNER);
1807 AssertReturn(pGVM->pVM == pVM, VERR_NOT_OWNER);
1808 RTNATIVETHREAD hAllegedEMT = RTThreadNativeSelf();
1809 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1810 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1811
1812 *ppGVM = pGVM;
1813 *ppGVMM = pGVMM;
1814 return VINF_SUCCESS;
1815}
1816
1817
1818/**
1819 * Check that the given GVM and VM structures match up.
1820 *
1821 * The calling thread must be in the same process as the VM. All current lookups
1822 * are by threads inside the same process, so this will not be an issue.
1823 *
1824 * @returns VBox status code.
1825 * @param pGVM The global (ring-0) VM structure.
1826 * @param pVM The cross context VM structure.
1827 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
1828 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1829 * @thread EMT
1830 *
1831 * @remarks This will assert in all failure paths.
1832 */
1833static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM)
1834{
1835 /*
1836 * Check the pointers.
1837 */
1838 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1839
1840 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1841 AssertReturn(((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
1842 AssertReturn(pGVM->pVM == pVM, VERR_INVALID_VM_HANDLE);
1843
1844
1845 /*
1846 * Get the pGVMM instance and check the VM handle.
1847 */
1848 PGVMM pGVMM;
1849 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1850
1851 uint16_t hGVM = pGVM->hSelf;
1852 AssertReturn( hGVM != NIL_GVM_HANDLE
1853 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1854
1855 RTPROCESS const pidSelf = RTProcSelf();
1856 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1857 AssertReturn( pHandle->pGVM == pGVM
1858 && pHandle->pVM == pVM
1859 && pHandle->ProcId == pidSelf
1860 && RT_VALID_PTR(pHandle->pvObj),
1861 VERR_INVALID_HANDLE);
1862
1863 /*
1864 * Check the EMT claim.
1865 */
1866 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
1867 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1868 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1869
1870 /*
1871 * Some more VM data consistency checks.
1872 */
1873 AssertReturn(pVM->cCpus == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
1874 AssertReturn(pVM->hSelf == hGVM, VERR_INCONSISTENT_VM_HANDLE);
1875 AssertReturn(pVM->pVMR0 == pVM, VERR_INCONSISTENT_VM_HANDLE);
1876 AssertReturn( pVM->enmVMState >= VMSTATE_CREATING
1877 && pVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
1878
1879 *ppGVMM = pGVMM;
1880 return VINF_SUCCESS;
1881}
1882
1883
1884/**
1885 * Validates a GVM/VM pair.
1886 *
1887 * @returns VBox status code.
1888 * @param pGVM The global (ring-0) VM structure.
1889 * @param pVM The cross context VM structure.
1890 */
1891GVMMR0DECL(int) GVMMR0ValidateGVMandVM(PGVM pGVM, PVM pVM)
1892{
1893 PGVMM pGVMM;
1894 return gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /*fTakeUsedLock*/);
1895}
1896
1897
1898
1899/**
1900 * Validates a GVM/VM/EMT combo.
1901 *
1902 * @returns VBox status code.
1903 * @param pGVM The global (ring-0) VM structure.
1904 * @param pVM The cross context VM structure.
1905 * @param idCpu The Virtual CPU ID of the calling EMT.
1906 * @thread EMT(idCpu)
1907 */
1908GVMMR0DECL(int) GVMMR0ValidateGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1909{
1910 PGVMM pGVMM;
1911 return gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
1912}
1913
1914
1915/**
1916 * Looks up the VM belonging to the specified EMT thread.
1917 *
1918 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1919 * unnecessary kernel panics when the EMT thread hits an assertion. The
1920 * call may or not be an EMT thread.
1921 *
1922 * @returns Pointer to the VM on success, NULL on failure.
1923 * @param hEMT The native thread handle of the EMT.
1924 * NIL_RTNATIVETHREAD means the current thread
1925 */
1926GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1927{
1928 /*
1929 * No Assertions here as we're usually called in a AssertMsgN or
1930 * RTAssert* context.
1931 */
1932 PGVMM pGVMM = g_pGVMM;
1933 if ( !VALID_PTR(pGVMM)
1934 || pGVMM->u32Magic != GVMM_MAGIC)
1935 return NULL;
1936
1937 if (hEMT == NIL_RTNATIVETHREAD)
1938 hEMT = RTThreadNativeSelf();
1939 RTPROCESS ProcId = RTProcSelf();
1940
1941 /*
1942 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1943 */
1944 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1945 {
1946 if ( pGVMM->aHandles[i].iSelf == i
1947 && pGVMM->aHandles[i].ProcId == ProcId
1948 && VALID_PTR(pGVMM->aHandles[i].pvObj)
1949 && VALID_PTR(pGVMM->aHandles[i].pVM)
1950 && VALID_PTR(pGVMM->aHandles[i].pGVM))
1951 {
1952 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1953 return pGVMM->aHandles[i].pVM;
1954
1955 /* This is fearly safe with the current process per VM approach. */
1956 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1957 VMCPUID const cCpus = pGVM->cCpus;
1958 if ( cCpus < 1
1959 || cCpus > VMM_MAX_CPU_COUNT)
1960 continue;
1961 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1962 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1963 return pGVMM->aHandles[i].pVM;
1964 }
1965 }
1966 return NULL;
1967}
1968
1969
1970/**
1971 * This is will wake up expired and soon-to-be expired VMs.
1972 *
1973 * @returns Number of VMs that has been woken up.
1974 * @param pGVMM Pointer to the GVMM instance data.
1975 * @param u64Now The current time.
1976 */
1977static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
1978{
1979 /*
1980 * Skip this if we've got disabled because of high resolution wakeups or by
1981 * the user.
1982 */
1983 if (!pGVMM->fDoEarlyWakeUps)
1984 return 0;
1985
1986/** @todo Rewrite this algorithm. See performance defect XYZ. */
1987
1988 /*
1989 * A cheap optimization to stop wasting so much time here on big setups.
1990 */
1991 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
1992 if ( pGVMM->cHaltedEMTs == 0
1993 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
1994 return 0;
1995
1996 /*
1997 * Only one thread doing this at a time.
1998 */
1999 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
2000 return 0;
2001
2002 /*
2003 * The first pass will wake up VMs which have actually expired
2004 * and look for VMs that should be woken up in the 2nd and 3rd passes.
2005 */
2006 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
2007 uint64_t u64Min = UINT64_MAX;
2008 unsigned cWoken = 0;
2009 unsigned cHalted = 0;
2010 unsigned cTodo2nd = 0;
2011 unsigned cTodo3rd = 0;
2012 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2013 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2014 i = pGVMM->aHandles[i].iNext)
2015 {
2016 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2017 if ( VALID_PTR(pCurGVM)
2018 && pCurGVM->u32Magic == GVM_MAGIC)
2019 {
2020 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2021 {
2022 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2023 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2024 if (u64)
2025 {
2026 if (u64 <= u64Now)
2027 {
2028 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2029 {
2030 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2031 AssertRC(rc);
2032 cWoken++;
2033 }
2034 }
2035 else
2036 {
2037 cHalted++;
2038 if (u64 <= uNsEarlyWakeUp1)
2039 cTodo2nd++;
2040 else if (u64 <= uNsEarlyWakeUp2)
2041 cTodo3rd++;
2042 else if (u64 < u64Min)
2043 u64 = u64Min;
2044 }
2045 }
2046 }
2047 }
2048 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2049 }
2050
2051 if (cTodo2nd)
2052 {
2053 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2054 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2055 i = pGVMM->aHandles[i].iNext)
2056 {
2057 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2058 if ( VALID_PTR(pCurGVM)
2059 && pCurGVM->u32Magic == GVM_MAGIC)
2060 {
2061 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2062 {
2063 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2064 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2065 if ( u64
2066 && u64 <= uNsEarlyWakeUp1)
2067 {
2068 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2069 {
2070 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2071 AssertRC(rc);
2072 cWoken++;
2073 }
2074 }
2075 }
2076 }
2077 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2078 }
2079 }
2080
2081 if (cTodo3rd)
2082 {
2083 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2084 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2085 i = pGVMM->aHandles[i].iNext)
2086 {
2087 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2088 if ( VALID_PTR(pCurGVM)
2089 && pCurGVM->u32Magic == GVM_MAGIC)
2090 {
2091 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2092 {
2093 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2094 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2095 if ( u64
2096 && u64 <= uNsEarlyWakeUp2)
2097 {
2098 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2099 {
2100 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2101 AssertRC(rc);
2102 cWoken++;
2103 }
2104 }
2105 }
2106 }
2107 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2108 }
2109 }
2110
2111 /*
2112 * Set the minimum value.
2113 */
2114 pGVMM->uNsNextEmtWakeup = u64Min;
2115
2116 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2117 return cWoken;
2118}
2119
2120
2121/**
2122 * Halt the EMT thread.
2123 *
2124 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2125 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2126 * @param pGVM The global (ring-0) VM structure.
2127 * @param pVM The cross context VM structure.
2128 * @param idCpu The Virtual CPU ID of the calling EMT.
2129 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2130 * @thread EMT(idCpu).
2131 */
2132GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2133{
2134 LogFlow(("GVMMR0SchedHalt: pGVM=%p pVM=%p idCpu=%#x u64ExpireGipTime=%#RX64\n", pGVM, pVM, idCpu, u64ExpireGipTime));
2135 GVMM_CHECK_SMAP_SETUP();
2136 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2137
2138 /*
2139 * Validate the VM structure, state and handle.
2140 */
2141 PGVMM pGVMM;
2142 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2143 if (RT_FAILURE(rc))
2144 return rc;
2145 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2146 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2147
2148 PGVMCPU pCurGVCpu = &pGVM->aCpus[idCpu];
2149 Assert(!pCurGVCpu->gvmm.s.u64HaltExpire);
2150
2151 /*
2152 * If we're doing early wake-ups, we must take the UsedList lock before we
2153 * start querying the current time.
2154 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2155 */
2156 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2157 if (fDoEarlyWakeUps)
2158 {
2159 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2160 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2161 }
2162
2163 pCurGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
2164
2165 /* GIP hack: We might are frequently sleeping for short intervals where the
2166 difference between GIP and system time matters on systems with high resolution
2167 system time. So, convert the input from GIP to System time in that case. */
2168 Assert(ASMGetFlags() & X86_EFL_IF);
2169 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2170 const uint64_t u64NowGip = RTTimeNanoTS();
2171 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2172
2173 if (fDoEarlyWakeUps)
2174 {
2175 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2176 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2177 }
2178
2179 /*
2180 * Go to sleep if we must...
2181 * Cap the sleep time to 1 second to be on the safe side.
2182 */
2183 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2184 if ( u64NowGip < u64ExpireGipTime
2185 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2186 ? pGVMM->nsMinSleepCompany
2187 : pGVMM->nsMinSleepAlone))
2188 {
2189 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2190 if (cNsInterval > RT_NS_1SEC)
2191 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2192 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2193 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2194 if (fDoEarlyWakeUps)
2195 {
2196 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2197 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2198 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2199 }
2200 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2201
2202 rc = RTSemEventMultiWaitEx(pCurGVCpu->gvmm.s.HaltEventMulti,
2203 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2204 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2205 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2206
2207 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0);
2208 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2209
2210 /* Reset the semaphore to try prevent a few false wake-ups. */
2211 if (rc == VINF_SUCCESS)
2212 {
2213 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
2214 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2215 }
2216 else if (rc == VERR_TIMEOUT)
2217 {
2218 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2219 rc = VINF_SUCCESS;
2220 }
2221 }
2222 else
2223 {
2224 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2225 if (fDoEarlyWakeUps)
2226 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2227 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2228 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
2229 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2230 }
2231
2232 return rc;
2233}
2234
2235
2236/**
2237 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2238 * the a sleeping EMT.
2239 *
2240 * @retval VINF_SUCCESS if successfully woken up.
2241 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2242 *
2243 * @param pGVM The global (ring-0) VM structure.
2244 * @param pGVCpu The global (ring-0) VCPU structure.
2245 */
2246DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2247{
2248 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2249
2250 /*
2251 * Signal the semaphore regardless of whether it's current blocked on it.
2252 *
2253 * The reason for this is that there is absolutely no way we can be 100%
2254 * certain that it isn't *about* go to go to sleep on it and just got
2255 * delayed a bit en route. So, we will always signal the semaphore when
2256 * the it is flagged as halted in the VMM.
2257 */
2258/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2259 int rc;
2260 if (pGVCpu->gvmm.s.u64HaltExpire)
2261 {
2262 rc = VINF_SUCCESS;
2263 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2264 }
2265 else
2266 {
2267 rc = VINF_GVM_NOT_BLOCKED;
2268 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2269 }
2270
2271 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2272 AssertRC(rc2);
2273
2274 return rc;
2275}
2276
2277
2278/**
2279 * Wakes up the halted EMT thread so it can service a pending request.
2280 *
2281 * @returns VBox status code.
2282 * @retval VINF_SUCCESS if successfully woken up.
2283 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2284 *
2285 * @param pGVM The global (ring-0) VM structure.
2286 * @param pVM The cross context VM structure.
2287 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2288 * @param fTakeUsedLock Take the used lock or not
2289 * @thread Any but EMT(idCpu).
2290 */
2291GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
2292{
2293 GVMM_CHECK_SMAP_SETUP();
2294 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2295
2296 /*
2297 * Validate input and take the UsedLock.
2298 */
2299 PGVMM pGVMM;
2300 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
2301 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2302 if (RT_SUCCESS(rc))
2303 {
2304 if (idCpu < pGVM->cCpus)
2305 {
2306 /*
2307 * Do the actual job.
2308 */
2309 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2310 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2311
2312 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2313 {
2314 /*
2315 * While we're here, do a round of scheduling.
2316 */
2317 Assert(ASMGetFlags() & X86_EFL_IF);
2318 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2319 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2320 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2321 }
2322 }
2323 else
2324 rc = VERR_INVALID_CPU_ID;
2325
2326 if (fTakeUsedLock)
2327 {
2328 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2329 AssertRC(rc2);
2330 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2331 }
2332 }
2333
2334 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2335 return rc;
2336}
2337
2338
2339/**
2340 * Wakes up the halted EMT thread so it can service a pending request.
2341 *
2342 * @returns VBox status code.
2343 * @retval VINF_SUCCESS if successfully woken up.
2344 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2345 *
2346 * @param pGVM The global (ring-0) VM structure.
2347 * @param pVM The cross context VM structure.
2348 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2349 * @thread Any but EMT(idCpu).
2350 */
2351GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2352{
2353 return GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
2354}
2355
2356
2357/**
2358 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2359 * parameter and no used locking.
2360 *
2361 * @returns VBox status code.
2362 * @retval VINF_SUCCESS if successfully woken up.
2363 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2364 *
2365 * @param pVM The cross context VM structure.
2366 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2367 * @thread Any but EMT(idCpu).
2368 * @deprecated Don't use in new code if possible! Use the GVM variant.
2369 */
2370GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PVM pVM, VMCPUID idCpu)
2371{
2372 GVMM_CHECK_SMAP_SETUP();
2373 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2374 PGVM pGVM;
2375 PGVMM pGVMM;
2376 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/);
2377 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2378 if (RT_SUCCESS(rc))
2379 rc = GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, false /*fTakeUsedLock*/);
2380 return rc;
2381}
2382
2383
2384/**
2385 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2386 * the Virtual CPU if it's still busy executing guest code.
2387 *
2388 * @returns VBox status code.
2389 * @retval VINF_SUCCESS if poked successfully.
2390 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2391 *
2392 * @param pGVM The global (ring-0) VM structure.
2393 * @param pVCpu The cross context virtual CPU structure.
2394 */
2395DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPU pVCpu)
2396{
2397 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2398
2399 RTCPUID idHostCpu = pVCpu->idHostCpu;
2400 if ( idHostCpu == NIL_RTCPUID
2401 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2402 {
2403 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2404 return VINF_GVM_NOT_BUSY_IN_GC;
2405 }
2406
2407 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2408 RTMpPokeCpu(idHostCpu);
2409 return VINF_SUCCESS;
2410}
2411
2412
2413/**
2414 * Pokes an EMT if it's still busy running guest code.
2415 *
2416 * @returns VBox status code.
2417 * @retval VINF_SUCCESS if poked successfully.
2418 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2419 *
2420 * @param pGVM The global (ring-0) VM structure.
2421 * @param pVM The cross context VM structure.
2422 * @param idCpu The ID of the virtual CPU to poke.
2423 * @param fTakeUsedLock Take the used lock or not
2424 */
2425GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
2426{
2427 /*
2428 * Validate input and take the UsedLock.
2429 */
2430 PGVMM pGVMM;
2431 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
2432 if (RT_SUCCESS(rc))
2433 {
2434 if (idCpu < pGVM->cCpus)
2435 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2436 else
2437 rc = VERR_INVALID_CPU_ID;
2438
2439 if (fTakeUsedLock)
2440 {
2441 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2442 AssertRC(rc2);
2443 }
2444 }
2445
2446 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2447 return rc;
2448}
2449
2450
2451/**
2452 * Pokes an EMT if it's still busy running guest code.
2453 *
2454 * @returns VBox status code.
2455 * @retval VINF_SUCCESS if poked successfully.
2456 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2457 *
2458 * @param pGVM The global (ring-0) VM structure.
2459 * @param pVM The cross context VM structure.
2460 * @param idCpu The ID of the virtual CPU to poke.
2461 */
2462GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2463{
2464 return GVMMR0SchedPokeEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
2465}
2466
2467
2468/**
2469 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2470 * used locking.
2471 *
2472 * @returns VBox status code.
2473 * @retval VINF_SUCCESS if poked successfully.
2474 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2475 *
2476 * @param pVM The cross context VM structure.
2477 * @param idCpu The ID of the virtual CPU to poke.
2478 *
2479 * @deprecated Don't use in new code if possible! Use the GVM variant.
2480 */
2481GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PVM pVM, VMCPUID idCpu)
2482{
2483 PGVM pGVM;
2484 PGVMM pGVMM;
2485 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/);
2486 if (RT_SUCCESS(rc))
2487 {
2488 if (idCpu < pGVM->cCpus)
2489 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2490 else
2491 rc = VERR_INVALID_CPU_ID;
2492 }
2493 return rc;
2494}
2495
2496
2497/**
2498 * Wakes up a set of halted EMT threads so they can service pending request.
2499 *
2500 * @returns VBox status code, no informational stuff.
2501 *
2502 * @param pGVM The global (ring-0) VM structure.
2503 * @param pVM The cross context VM structure.
2504 * @param pSleepSet The set of sleepers to wake up.
2505 * @param pPokeSet The set of CPUs to poke.
2506 */
2507GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PVM pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2508{
2509 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2510 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2511 GVMM_CHECK_SMAP_SETUP();
2512 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2513 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2514
2515 /*
2516 * Validate input and take the UsedLock.
2517 */
2518 PGVMM pGVMM;
2519 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /* fTakeUsedLock */);
2520 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2521 if (RT_SUCCESS(rc))
2522 {
2523 rc = VINF_SUCCESS;
2524 VMCPUID idCpu = pGVM->cCpus;
2525 while (idCpu-- > 0)
2526 {
2527 /* Don't try poke or wake up ourselves. */
2528 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2529 continue;
2530
2531 /* just ignore errors for now. */
2532 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2533 {
2534 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2535 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2536 }
2537 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2538 {
2539 gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2540 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2541 }
2542 }
2543
2544 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2545 AssertRC(rc2);
2546 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2547 }
2548
2549 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2550 return rc;
2551}
2552
2553
2554/**
2555 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2556 *
2557 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2558 * @param pGVM The global (ring-0) VM structure.
2559 * @param pVM The cross context VM structure.
2560 * @param pReq Pointer to the request packet.
2561 */
2562GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PVM pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2563{
2564 /*
2565 * Validate input and pass it on.
2566 */
2567 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2568 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2569
2570 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, pVM, &pReq->SleepSet, &pReq->PokeSet);
2571}
2572
2573
2574
2575/**
2576 * Poll the schedule to see if someone else should get a chance to run.
2577 *
2578 * This is a bit hackish and will not work too well if the machine is
2579 * under heavy load from non-VM processes.
2580 *
2581 * @returns VINF_SUCCESS if not yielded.
2582 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2583 * @param pGVM The global (ring-0) VM structure.
2584 * @param pVM The cross context VM structure.
2585 * @param idCpu The Virtual CPU ID of the calling EMT.
2586 * @param fYield Whether to yield or not.
2587 * This is for when we're spinning in the halt loop.
2588 * @thread EMT(idCpu).
2589 */
2590GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fYield)
2591{
2592 /*
2593 * Validate input.
2594 */
2595 PGVMM pGVMM;
2596 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2597 if (RT_SUCCESS(rc))
2598 {
2599 /*
2600 * We currently only implement helping doing wakeups (fYield = false), so don't
2601 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2602 */
2603 if (!fYield && pGVMM->fDoEarlyWakeUps)
2604 {
2605 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2606 pGVM->gvmm.s.StatsSched.cPollCalls++;
2607
2608 Assert(ASMGetFlags() & X86_EFL_IF);
2609 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2610
2611 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2612
2613 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2614 }
2615 /*
2616 * Not quite sure what we could do here...
2617 */
2618 else if (fYield)
2619 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2620 else
2621 rc = VINF_SUCCESS;
2622 }
2623
2624 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2625 return rc;
2626}
2627
2628
2629#ifdef GVMM_SCHED_WITH_PPT
2630/**
2631 * Timer callback for the periodic preemption timer.
2632 *
2633 * @param pTimer The timer handle.
2634 * @param pvUser Pointer to the per cpu structure.
2635 * @param iTick The current tick.
2636 */
2637static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2638{
2639 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2640 NOREF(pTimer); NOREF(iTick);
2641
2642 /*
2643 * Termination check
2644 */
2645 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2646 return;
2647
2648 /*
2649 * Do the house keeping.
2650 */
2651 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2652
2653 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2654 {
2655 /*
2656 * Historicize the max frequency.
2657 */
2658 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2659 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2660 pCpu->Ppt.iTickHistorization = 0;
2661 pCpu->Ppt.uDesiredHz = 0;
2662
2663 /*
2664 * Check if the current timer frequency.
2665 */
2666 uint32_t uHistMaxHz = 0;
2667 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2668 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2669 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2670 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2671 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2672 else if (uHistMaxHz)
2673 {
2674 /*
2675 * Reprogram it.
2676 */
2677 pCpu->Ppt.cChanges++;
2678 pCpu->Ppt.iTickHistorization = 0;
2679 pCpu->Ppt.uTimerHz = uHistMaxHz;
2680 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2681 pCpu->Ppt.cNsInterval = cNsInterval;
2682 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2683 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2684 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2685 / cNsInterval;
2686 else
2687 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2688 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2689
2690 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2691 RTTimerChangeInterval(pTimer, cNsInterval);
2692 }
2693 else
2694 {
2695 /*
2696 * Stop it.
2697 */
2698 pCpu->Ppt.fStarted = false;
2699 pCpu->Ppt.uTimerHz = 0;
2700 pCpu->Ppt.cNsInterval = 0;
2701 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2702
2703 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2704 RTTimerStop(pTimer);
2705 }
2706 }
2707 else
2708 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2709}
2710#endif /* GVMM_SCHED_WITH_PPT */
2711
2712
2713/**
2714 * Updates the periodic preemption timer for the calling CPU.
2715 *
2716 * The caller must have disabled preemption!
2717 * The caller must check that the host can do high resolution timers.
2718 *
2719 * @param pVM The cross context VM structure.
2720 * @param idHostCpu The current host CPU id.
2721 * @param uHz The desired frequency.
2722 */
2723GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz)
2724{
2725 NOREF(pVM);
2726#ifdef GVMM_SCHED_WITH_PPT
2727 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2728 Assert(RTTimerCanDoHighResolution());
2729
2730 /*
2731 * Resolve the per CPU data.
2732 */
2733 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2734 PGVMM pGVMM = g_pGVMM;
2735 if ( !VALID_PTR(pGVMM)
2736 || pGVMM->u32Magic != GVMM_MAGIC)
2737 return;
2738 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2739 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2740 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2741 && pCpu->idCpu == idHostCpu,
2742 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2743
2744 /*
2745 * Check whether we need to do anything about the timer.
2746 * We have to be a little bit careful since we might be race the timer
2747 * callback here.
2748 */
2749 if (uHz > 16384)
2750 uHz = 16384; /** @todo add a query method for this! */
2751 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2752 && uHz >= pCpu->Ppt.uMinHz
2753 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2754 {
2755 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2756
2757 pCpu->Ppt.uDesiredHz = uHz;
2758 uint32_t cNsInterval = 0;
2759 if (!pCpu->Ppt.fStarted)
2760 {
2761 pCpu->Ppt.cStarts++;
2762 pCpu->Ppt.fStarted = true;
2763 pCpu->Ppt.fStarting = true;
2764 pCpu->Ppt.iTickHistorization = 0;
2765 pCpu->Ppt.uTimerHz = uHz;
2766 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2767 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2768 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2769 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2770 / cNsInterval;
2771 else
2772 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2773 }
2774
2775 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2776
2777 if (cNsInterval)
2778 {
2779 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2780 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2781 AssertRC(rc);
2782
2783 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2784 if (RT_FAILURE(rc))
2785 pCpu->Ppt.fStarted = false;
2786 pCpu->Ppt.fStarting = false;
2787 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2788 }
2789 }
2790#else /* !GVMM_SCHED_WITH_PPT */
2791 NOREF(idHostCpu); NOREF(uHz);
2792#endif /* !GVMM_SCHED_WITH_PPT */
2793}
2794
2795
2796/**
2797 * Retrieves the GVMM statistics visible to the caller.
2798 *
2799 * @returns VBox status code.
2800 *
2801 * @param pStats Where to put the statistics.
2802 * @param pSession The current session.
2803 * @param pGVM The GVM to obtain statistics for. Optional.
2804 * @param pVM The VM structure corresponding to @a pGVM.
2805 */
2806GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
2807{
2808 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
2809
2810 /*
2811 * Validate input.
2812 */
2813 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2814 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2815 pStats->cVMs = 0; /* (crash before taking the sem...) */
2816
2817 /*
2818 * Take the lock and get the VM statistics.
2819 */
2820 PGVMM pGVMM;
2821 if (pGVM)
2822 {
2823 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/);
2824 if (RT_FAILURE(rc))
2825 return rc;
2826 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2827 }
2828 else
2829 {
2830 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2831 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2832
2833 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2834 AssertRCReturn(rc, rc);
2835 }
2836
2837 /*
2838 * Enumerate the VMs and add the ones visible to the statistics.
2839 */
2840 pStats->cVMs = 0;
2841 pStats->cEMTs = 0;
2842 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2843
2844 for (unsigned i = pGVMM->iUsedHead;
2845 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2846 i = pGVMM->aHandles[i].iNext)
2847 {
2848 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
2849 void *pvObj = pGVMM->aHandles[i].pvObj;
2850 if ( VALID_PTR(pvObj)
2851 && VALID_PTR(pOtherGVM)
2852 && pOtherGVM->u32Magic == GVM_MAGIC
2853 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2854 {
2855 pStats->cVMs++;
2856 pStats->cEMTs += pOtherGVM->cCpus;
2857
2858 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
2859 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
2860 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
2861 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2862 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
2863
2864 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
2865 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2866 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2867
2868 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
2869 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
2870
2871 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
2872 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
2873 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
2874 }
2875 }
2876
2877 /*
2878 * Copy out the per host CPU statistics.
2879 */
2880 uint32_t iDstCpu = 0;
2881 uint32_t cSrcCpus = pGVMM->cHostCpus;
2882 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2883 {
2884 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2885 {
2886 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2887 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2888#ifdef GVMM_SCHED_WITH_PPT
2889 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2890 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2891 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2892 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2893#else
2894 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2895 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2896 pStats->aHostCpus[iDstCpu].cChanges = 0;
2897 pStats->aHostCpus[iDstCpu].cStarts = 0;
2898#endif
2899 iDstCpu++;
2900 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
2901 break;
2902 }
2903 }
2904 pStats->cHostCpus = iDstCpu;
2905
2906 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2907
2908 return VINF_SUCCESS;
2909}
2910
2911
2912/**
2913 * VMMR0 request wrapper for GVMMR0QueryStatistics.
2914 *
2915 * @returns see GVMMR0QueryStatistics.
2916 * @param pGVM The global (ring-0) VM structure. Optional.
2917 * @param pVM The cross context VM structure. Optional.
2918 * @param pReq Pointer to the request packet.
2919 * @param pSession The current session.
2920 */
2921GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PVM pVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
2922{
2923 /*
2924 * Validate input and pass it on.
2925 */
2926 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2927 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2928 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
2929
2930 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM, pVM);
2931}
2932
2933
2934/**
2935 * Resets the specified GVMM statistics.
2936 *
2937 * @returns VBox status code.
2938 *
2939 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
2940 * @param pSession The current session.
2941 * @param pGVM The GVM to reset statistics for. Optional.
2942 * @param pVM The VM structure corresponding to @a pGVM.
2943 */
2944GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
2945{
2946 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
2947
2948 /*
2949 * Validate input.
2950 */
2951 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2952 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2953
2954 /*
2955 * Take the lock and get the VM statistics.
2956 */
2957 PGVMM pGVMM;
2958 if (pGVM)
2959 {
2960 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/);
2961 if (RT_FAILURE(rc))
2962 return rc;
2963# define MAYBE_RESET_FIELD(field) \
2964 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2965 MAYBE_RESET_FIELD(cHaltCalls);
2966 MAYBE_RESET_FIELD(cHaltBlocking);
2967 MAYBE_RESET_FIELD(cHaltTimeouts);
2968 MAYBE_RESET_FIELD(cHaltNotBlocking);
2969 MAYBE_RESET_FIELD(cHaltWakeUps);
2970 MAYBE_RESET_FIELD(cWakeUpCalls);
2971 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2972 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2973 MAYBE_RESET_FIELD(cPokeCalls);
2974 MAYBE_RESET_FIELD(cPokeNotBusy);
2975 MAYBE_RESET_FIELD(cPollCalls);
2976 MAYBE_RESET_FIELD(cPollHalts);
2977 MAYBE_RESET_FIELD(cPollWakeUps);
2978# undef MAYBE_RESET_FIELD
2979 }
2980 else
2981 {
2982 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2983
2984 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2985 AssertRCReturn(rc, rc);
2986 }
2987
2988 /*
2989 * Enumerate the VMs and add the ones visible to the statistics.
2990 */
2991 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
2992 {
2993 for (unsigned i = pGVMM->iUsedHead;
2994 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2995 i = pGVMM->aHandles[i].iNext)
2996 {
2997 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
2998 void *pvObj = pGVMM->aHandles[i].pvObj;
2999 if ( VALID_PTR(pvObj)
3000 && VALID_PTR(pOtherGVM)
3001 && pOtherGVM->u32Magic == GVM_MAGIC
3002 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3003 {
3004# define MAYBE_RESET_FIELD(field) \
3005 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3006 MAYBE_RESET_FIELD(cHaltCalls);
3007 MAYBE_RESET_FIELD(cHaltBlocking);
3008 MAYBE_RESET_FIELD(cHaltTimeouts);
3009 MAYBE_RESET_FIELD(cHaltNotBlocking);
3010 MAYBE_RESET_FIELD(cHaltWakeUps);
3011 MAYBE_RESET_FIELD(cWakeUpCalls);
3012 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3013 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3014 MAYBE_RESET_FIELD(cPokeCalls);
3015 MAYBE_RESET_FIELD(cPokeNotBusy);
3016 MAYBE_RESET_FIELD(cPollCalls);
3017 MAYBE_RESET_FIELD(cPollHalts);
3018 MAYBE_RESET_FIELD(cPollWakeUps);
3019# undef MAYBE_RESET_FIELD
3020 }
3021 }
3022 }
3023
3024 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3025
3026 return VINF_SUCCESS;
3027}
3028
3029
3030/**
3031 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3032 *
3033 * @returns see GVMMR0ResetStatistics.
3034 * @param pGVM The global (ring-0) VM structure. Optional.
3035 * @param pVM The cross context VM structure. Optional.
3036 * @param pReq Pointer to the request packet.
3037 * @param pSession The current session.
3038 */
3039GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PVM pVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3040{
3041 /*
3042 * Validate input and pass it on.
3043 */
3044 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3045 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3046 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3047
3048 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM, pVM);
3049}
3050
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette