VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 75649

Last change on this file since 75649 was 75646, checked in by vboxsync, 6 years ago

VMM: HLT/MWAIT optimizations for busy guests: don't go back to ring-3 just to call GVMMR0SchedHalt(), do the first call in ring-0. This saves a reduces interrupt latency for some workloads. bugref:9172

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 108.9 KB
Line 
1/* $Id: GVMMR0.cpp 75646 2018-11-21 15:38:10Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2017 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*********************************************************************************************************************************
50* Header Files *
51*********************************************************************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/gvm.h>
57#include <VBox/vmm/vm.h>
58#include <VBox/vmm/vmcpuset.h>
59#include <VBox/vmm/vmm.h>
60#ifdef VBOX_WITH_NEM_R0
61# include <VBox/vmm/nem.h>
62#endif
63#include <VBox/param.h>
64#include <VBox/err.h>
65
66#include <iprt/asm.h>
67#include <iprt/asm-amd64-x86.h>
68#include <iprt/critsect.h>
69#include <iprt/mem.h>
70#include <iprt/semaphore.h>
71#include <iprt/time.h>
72#include <VBox/log.h>
73#include <iprt/thread.h>
74#include <iprt/process.h>
75#include <iprt/param.h>
76#include <iprt/string.h>
77#include <iprt/assert.h>
78#include <iprt/mem.h>
79#include <iprt/memobj.h>
80#include <iprt/mp.h>
81#include <iprt/cpuset.h>
82#include <iprt/spinlock.h>
83#include <iprt/timer.h>
84
85#include "dtrace/VBoxVMM.h"
86
87
88/*********************************************************************************************************************************
89* Defined Constants And Macros *
90*********************************************************************************************************************************/
91#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING)
92/** Define this to enable the periodic preemption timer. */
93# define GVMM_SCHED_WITH_PPT
94#endif
95
96
97/** @def GVMM_CHECK_SMAP_SETUP
98 * SMAP check setup. */
99/** @def GVMM_CHECK_SMAP_CHECK
100 * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
101 * it will be logged and @a a_BadExpr is executed. */
102/** @def GVMM_CHECK_SMAP_CHECK2
103 * Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will
104 * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
105 * executed. */
106#if defined(VBOX_STRICT) || 1
107# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
108# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) \
109 do { \
110 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
111 { \
112 RTCCUINTREG fEflCheck = ASMGetFlags(); \
113 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
114 { /* likely */ } \
115 else \
116 { \
117 SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
118 a_BadExpr; \
119 } \
120 } \
121 } while (0)
122# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) \
123 do { \
124 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
125 { \
126 RTCCUINTREG fEflCheck = ASMGetFlags(); \
127 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
128 { /* likely */ } \
129 else \
130 { \
131 SUPR0BadContext((a_pVM) ? (a_pVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
132 a_BadExpr; \
133 } \
134 } \
135 } while (0)
136#else
137# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0
138# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures)
139# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) NOREF(fKernelFeatures)
140#endif
141
142
143
144/*********************************************************************************************************************************
145* Structures and Typedefs *
146*********************************************************************************************************************************/
147
148/**
149 * Global VM handle.
150 */
151typedef struct GVMHANDLE
152{
153 /** The index of the next handle in the list (free or used). (0 is nil.) */
154 uint16_t volatile iNext;
155 /** Our own index / handle value. */
156 uint16_t iSelf;
157 /** The process ID of the handle owner.
158 * This is used for access checks. */
159 RTPROCESS ProcId;
160 /** The pointer to the ring-0 only (aka global) VM structure. */
161 PGVM pGVM;
162 /** The ring-0 mapping of the shared VM instance data. */
163 PVM pVM;
164 /** The virtual machine object. */
165 void *pvObj;
166 /** The session this VM is associated with. */
167 PSUPDRVSESSION pSession;
168 /** The ring-0 handle of the EMT0 thread.
169 * This is used for ownership checks as well as looking up a VM handle by thread
170 * at times like assertions. */
171 RTNATIVETHREAD hEMT0;
172} GVMHANDLE;
173/** Pointer to a global VM handle. */
174typedef GVMHANDLE *PGVMHANDLE;
175
176/** Number of GVM handles (including the NIL handle). */
177#if HC_ARCH_BITS == 64
178# define GVMM_MAX_HANDLES 8192
179#else
180# define GVMM_MAX_HANDLES 128
181#endif
182
183/**
184 * Per host CPU GVMM data.
185 */
186typedef struct GVMMHOSTCPU
187{
188 /** Magic number (GVMMHOSTCPU_MAGIC). */
189 uint32_t volatile u32Magic;
190 /** The CPU ID. */
191 RTCPUID idCpu;
192 /** The CPU set index. */
193 uint32_t idxCpuSet;
194
195#ifdef GVMM_SCHED_WITH_PPT
196 /** Periodic preemption timer data. */
197 struct
198 {
199 /** The handle to the periodic preemption timer. */
200 PRTTIMER pTimer;
201 /** Spinlock protecting the data below. */
202 RTSPINLOCK hSpinlock;
203 /** The smalles Hz that we need to care about. (static) */
204 uint32_t uMinHz;
205 /** The number of ticks between each historization. */
206 uint32_t cTicksHistoriziationInterval;
207 /** The current historization tick (counting up to
208 * cTicksHistoriziationInterval and then resetting). */
209 uint32_t iTickHistorization;
210 /** The current timer interval. This is set to 0 when inactive. */
211 uint32_t cNsInterval;
212 /** The current timer frequency. This is set to 0 when inactive. */
213 uint32_t uTimerHz;
214 /** The current max frequency reported by the EMTs.
215 * This gets historicize and reset by the timer callback. This is
216 * read without holding the spinlock, so needs atomic updating. */
217 uint32_t volatile uDesiredHz;
218 /** Whether the timer was started or not. */
219 bool volatile fStarted;
220 /** Set if we're starting timer. */
221 bool volatile fStarting;
222 /** The index of the next history entry (mod it). */
223 uint32_t iHzHistory;
224 /** Historicized uDesiredHz values. The array wraps around, new entries
225 * are added at iHzHistory. This is updated approximately every
226 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
227 uint32_t aHzHistory[8];
228 /** Statistics counter for recording the number of interval changes. */
229 uint32_t cChanges;
230 /** Statistics counter for recording the number of timer starts. */
231 uint32_t cStarts;
232 } Ppt;
233#endif /* GVMM_SCHED_WITH_PPT */
234
235} GVMMHOSTCPU;
236/** Pointer to the per host CPU GVMM data. */
237typedef GVMMHOSTCPU *PGVMMHOSTCPU;
238/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
239#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
240/** The interval on history entry should cover (approximately) give in
241 * nanoseconds. */
242#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
243
244
245/**
246 * The GVMM instance data.
247 */
248typedef struct GVMM
249{
250 /** Eyecatcher / magic. */
251 uint32_t u32Magic;
252 /** The index of the head of the free handle chain. (0 is nil.) */
253 uint16_t volatile iFreeHead;
254 /** The index of the head of the active handle chain. (0 is nil.) */
255 uint16_t volatile iUsedHead;
256 /** The number of VMs. */
257 uint16_t volatile cVMs;
258 /** Alignment padding. */
259 uint16_t u16Reserved;
260 /** The number of EMTs. */
261 uint32_t volatile cEMTs;
262 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
263 uint32_t volatile cHaltedEMTs;
264 /** Mini lock for restricting early wake-ups to one thread. */
265 bool volatile fDoingEarlyWakeUps;
266 bool afPadding[3]; /**< explicit alignment padding. */
267 /** When the next halted or sleeping EMT will wake up.
268 * This is set to 0 when it needs recalculating and to UINT64_MAX when
269 * there are no halted or sleeping EMTs in the GVMM. */
270 uint64_t uNsNextEmtWakeup;
271 /** The lock used to serialize VM creation, destruction and associated events that
272 * isn't performance critical. Owners may acquire the list lock. */
273 RTCRITSECT CreateDestroyLock;
274 /** The lock used to serialize used list updates and accesses.
275 * This indirectly includes scheduling since the scheduler will have to walk the
276 * used list to examin running VMs. Owners may not acquire any other locks. */
277 RTCRITSECTRW UsedLock;
278 /** The handle array.
279 * The size of this array defines the maximum number of currently running VMs.
280 * The first entry is unused as it represents the NIL handle. */
281 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
282
283 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
284 * The number of EMTs that means we no longer consider ourselves alone on a
285 * CPU/Core.
286 */
287 uint32_t cEMTsMeansCompany;
288 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
289 * The minimum sleep time for when we're alone, in nano seconds.
290 */
291 uint32_t nsMinSleepAlone;
292 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
293 * The minimum sleep time for when we've got company, in nano seconds.
294 */
295 uint32_t nsMinSleepCompany;
296 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
297 * The limit for the first round of early wake-ups, given in nano seconds.
298 */
299 uint32_t nsEarlyWakeUp1;
300 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
301 * The limit for the second round of early wake-ups, given in nano seconds.
302 */
303 uint32_t nsEarlyWakeUp2;
304
305 /** Set if we're doing early wake-ups.
306 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
307 bool volatile fDoEarlyWakeUps;
308
309 /** The number of entries in the host CPU array (aHostCpus). */
310 uint32_t cHostCpus;
311 /** Per host CPU data (variable length). */
312 GVMMHOSTCPU aHostCpus[1];
313} GVMM;
314AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
315AssertCompileMemberAlignment(GVMM, UsedLock, 8);
316AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
317/** Pointer to the GVMM instance data. */
318typedef GVMM *PGVMM;
319
320/** The GVMM::u32Magic value (Charlie Haden). */
321#define GVMM_MAGIC UINT32_C(0x19370806)
322
323
324
325/*********************************************************************************************************************************
326* Global Variables *
327*********************************************************************************************************************************/
328/** Pointer to the GVMM instance data.
329 * (Just my general dislike for global variables.) */
330static PGVMM g_pGVMM = NULL;
331
332/** Macro for obtaining and validating the g_pGVMM pointer.
333 * On failure it will return from the invoking function with the specified return value.
334 *
335 * @param pGVMM The name of the pGVMM variable.
336 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
337 * status codes.
338 */
339#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
340 do { \
341 (pGVMM) = g_pGVMM;\
342 AssertPtrReturn((pGVMM), (rc)); \
343 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
344 } while (0)
345
346/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
347 * On failure it will return from the invoking function.
348 *
349 * @param pGVMM The name of the pGVMM variable.
350 */
351#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
352 do { \
353 (pGVMM) = g_pGVMM;\
354 AssertPtrReturnVoid((pGVMM)); \
355 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
356 } while (0)
357
358
359/*********************************************************************************************************************************
360* Internal Functions *
361*********************************************************************************************************************************/
362static void gvmmR0InitPerVMData(PGVM pGVM);
363static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
364static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock);
365static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM);
366
367#ifdef GVMM_SCHED_WITH_PPT
368static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
369#endif
370
371
372/**
373 * Initializes the GVMM.
374 *
375 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
376 *
377 * @returns VBox status code.
378 */
379GVMMR0DECL(int) GVMMR0Init(void)
380{
381 LogFlow(("GVMMR0Init:\n"));
382
383 /*
384 * Allocate and initialize the instance data.
385 */
386 uint32_t cHostCpus = RTMpGetArraySize();
387 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
388
389 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
390 if (!pGVMM)
391 return VERR_NO_MEMORY;
392 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
393 "GVMM-CreateDestroyLock");
394 if (RT_SUCCESS(rc))
395 {
396 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
397 if (RT_SUCCESS(rc))
398 {
399 pGVMM->u32Magic = GVMM_MAGIC;
400 pGVMM->iUsedHead = 0;
401 pGVMM->iFreeHead = 1;
402
403 /* the nil handle */
404 pGVMM->aHandles[0].iSelf = 0;
405 pGVMM->aHandles[0].iNext = 0;
406
407 /* the tail */
408 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
409 pGVMM->aHandles[i].iSelf = i;
410 pGVMM->aHandles[i].iNext = 0; /* nil */
411
412 /* the rest */
413 while (i-- > 1)
414 {
415 pGVMM->aHandles[i].iSelf = i;
416 pGVMM->aHandles[i].iNext = i + 1;
417 }
418
419 /* The default configuration values. */
420 uint32_t cNsResolution = RTSemEventMultiGetResolution();
421 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
422 if (cNsResolution >= 5*RT_NS_100US)
423 {
424 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
425 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
426 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
427 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
428 }
429 else if (cNsResolution > RT_NS_100US)
430 {
431 pGVMM->nsMinSleepAlone = cNsResolution / 2;
432 pGVMM->nsMinSleepCompany = cNsResolution / 4;
433 pGVMM->nsEarlyWakeUp1 = 0;
434 pGVMM->nsEarlyWakeUp2 = 0;
435 }
436 else
437 {
438 pGVMM->nsMinSleepAlone = 2000;
439 pGVMM->nsMinSleepCompany = 2000;
440 pGVMM->nsEarlyWakeUp1 = 0;
441 pGVMM->nsEarlyWakeUp2 = 0;
442 }
443 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
444
445 /* The host CPU data. */
446 pGVMM->cHostCpus = cHostCpus;
447 uint32_t iCpu = cHostCpus;
448 RTCPUSET PossibleSet;
449 RTMpGetSet(&PossibleSet);
450 while (iCpu-- > 0)
451 {
452 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
453#ifdef GVMM_SCHED_WITH_PPT
454 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
455 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
456 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
457 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
458 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
459 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
460 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
461 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
462 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
463 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
464 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
465 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
466#endif
467
468 if (RTCpuSetIsMember(&PossibleSet, iCpu))
469 {
470 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
471 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
472
473#ifdef GVMM_SCHED_WITH_PPT
474 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
475 50*1000*1000 /* whatever */,
476 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
477 gvmmR0SchedPeriodicPreemptionTimerCallback,
478 &pGVMM->aHostCpus[iCpu]);
479 if (RT_SUCCESS(rc))
480 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
481 if (RT_FAILURE(rc))
482 {
483 while (iCpu < cHostCpus)
484 {
485 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
486 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
487 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
488 iCpu++;
489 }
490 break;
491 }
492#endif
493 }
494 else
495 {
496 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
497 pGVMM->aHostCpus[iCpu].u32Magic = 0;
498 }
499 }
500 if (RT_SUCCESS(rc))
501 {
502 g_pGVMM = pGVMM;
503 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
504 return VINF_SUCCESS;
505 }
506
507 /* bail out. */
508 RTCritSectRwDelete(&pGVMM->UsedLock);
509 }
510 RTCritSectDelete(&pGVMM->CreateDestroyLock);
511 }
512
513 RTMemFree(pGVMM);
514 return rc;
515}
516
517
518/**
519 * Terminates the GVM.
520 *
521 * This is called while owning the loader semaphore (see supdrvLdrFree()).
522 * And unless something is wrong, there should be absolutely no VMs
523 * registered at this point.
524 */
525GVMMR0DECL(void) GVMMR0Term(void)
526{
527 LogFlow(("GVMMR0Term:\n"));
528
529 PGVMM pGVMM = g_pGVMM;
530 g_pGVMM = NULL;
531 if (RT_UNLIKELY(!VALID_PTR(pGVMM)))
532 {
533 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
534 return;
535 }
536
537 /*
538 * First of all, stop all active timers.
539 */
540 uint32_t cActiveTimers = 0;
541 uint32_t iCpu = pGVMM->cHostCpus;
542 while (iCpu-- > 0)
543 {
544 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
545#ifdef GVMM_SCHED_WITH_PPT
546 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
547 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
548 cActiveTimers++;
549#endif
550 }
551 if (cActiveTimers)
552 RTThreadSleep(1); /* fudge */
553
554 /*
555 * Invalidate the and free resources.
556 */
557 pGVMM->u32Magic = ~GVMM_MAGIC;
558 RTCritSectRwDelete(&pGVMM->UsedLock);
559 RTCritSectDelete(&pGVMM->CreateDestroyLock);
560
561 pGVMM->iFreeHead = 0;
562 if (pGVMM->iUsedHead)
563 {
564 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
565 pGVMM->iUsedHead = 0;
566 }
567
568#ifdef GVMM_SCHED_WITH_PPT
569 iCpu = pGVMM->cHostCpus;
570 while (iCpu-- > 0)
571 {
572 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
573 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
574 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
575 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
576 }
577#endif
578
579 RTMemFree(pGVMM);
580}
581
582
583/**
584 * A quick hack for setting global config values.
585 *
586 * @returns VBox status code.
587 *
588 * @param pSession The session handle. Used for authentication.
589 * @param pszName The variable name.
590 * @param u64Value The new value.
591 */
592GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
593{
594 /*
595 * Validate input.
596 */
597 PGVMM pGVMM;
598 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
599 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
600 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
601
602 /*
603 * String switch time!
604 */
605 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
606 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
607 int rc = VINF_SUCCESS;
608 pszName += sizeof("/GVMM/") - 1;
609 if (!strcmp(pszName, "cEMTsMeansCompany"))
610 {
611 if (u64Value <= UINT32_MAX)
612 pGVMM->cEMTsMeansCompany = u64Value;
613 else
614 rc = VERR_OUT_OF_RANGE;
615 }
616 else if (!strcmp(pszName, "MinSleepAlone"))
617 {
618 if (u64Value <= RT_NS_100MS)
619 pGVMM->nsMinSleepAlone = u64Value;
620 else
621 rc = VERR_OUT_OF_RANGE;
622 }
623 else if (!strcmp(pszName, "MinSleepCompany"))
624 {
625 if (u64Value <= RT_NS_100MS)
626 pGVMM->nsMinSleepCompany = u64Value;
627 else
628 rc = VERR_OUT_OF_RANGE;
629 }
630 else if (!strcmp(pszName, "EarlyWakeUp1"))
631 {
632 if (u64Value <= RT_NS_100MS)
633 {
634 pGVMM->nsEarlyWakeUp1 = u64Value;
635 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
636 }
637 else
638 rc = VERR_OUT_OF_RANGE;
639 }
640 else if (!strcmp(pszName, "EarlyWakeUp2"))
641 {
642 if (u64Value <= RT_NS_100MS)
643 {
644 pGVMM->nsEarlyWakeUp2 = u64Value;
645 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
646 }
647 else
648 rc = VERR_OUT_OF_RANGE;
649 }
650 else
651 rc = VERR_CFGM_VALUE_NOT_FOUND;
652 return rc;
653}
654
655
656/**
657 * A quick hack for getting global config values.
658 *
659 * @returns VBox status code.
660 *
661 * @param pSession The session handle. Used for authentication.
662 * @param pszName The variable name.
663 * @param pu64Value Where to return the value.
664 */
665GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
666{
667 /*
668 * Validate input.
669 */
670 PGVMM pGVMM;
671 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
672 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
673 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
674 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
675
676 /*
677 * String switch time!
678 */
679 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
680 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
681 int rc = VINF_SUCCESS;
682 pszName += sizeof("/GVMM/") - 1;
683 if (!strcmp(pszName, "cEMTsMeansCompany"))
684 *pu64Value = pGVMM->cEMTsMeansCompany;
685 else if (!strcmp(pszName, "MinSleepAlone"))
686 *pu64Value = pGVMM->nsMinSleepAlone;
687 else if (!strcmp(pszName, "MinSleepCompany"))
688 *pu64Value = pGVMM->nsMinSleepCompany;
689 else if (!strcmp(pszName, "EarlyWakeUp1"))
690 *pu64Value = pGVMM->nsEarlyWakeUp1;
691 else if (!strcmp(pszName, "EarlyWakeUp2"))
692 *pu64Value = pGVMM->nsEarlyWakeUp2;
693 else
694 rc = VERR_CFGM_VALUE_NOT_FOUND;
695 return rc;
696}
697
698
699/**
700 * Acquire the 'used' lock in shared mode.
701 *
702 * This prevents destruction of the VM while we're in ring-0.
703 *
704 * @returns IPRT status code, see RTSemFastMutexRequest.
705 * @param a_pGVMM The GVMM instance data.
706 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
707 */
708#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
709
710/**
711 * Release the 'used' lock in when owning it in shared mode.
712 *
713 * @returns IPRT status code, see RTSemFastMutexRequest.
714 * @param a_pGVMM The GVMM instance data.
715 * @sa GVMMR0_USED_SHARED_LOCK
716 */
717#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
718
719/**
720 * Acquire the 'used' lock in exclusive mode.
721 *
722 * Only use this function when making changes to the used list.
723 *
724 * @returns IPRT status code, see RTSemFastMutexRequest.
725 * @param a_pGVMM The GVMM instance data.
726 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
727 */
728#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
729
730/**
731 * Release the 'used' lock when owning it in exclusive mode.
732 *
733 * @returns IPRT status code, see RTSemFastMutexRelease.
734 * @param a_pGVMM The GVMM instance data.
735 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
736 */
737#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
738
739
740/**
741 * Try acquire the 'create & destroy' lock.
742 *
743 * @returns IPRT status code, see RTSemFastMutexRequest.
744 * @param pGVMM The GVMM instance data.
745 */
746DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
747{
748 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
749 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
750 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
751 return rc;
752}
753
754
755/**
756 * Release the 'create & destroy' lock.
757 *
758 * @returns IPRT status code, see RTSemFastMutexRequest.
759 * @param pGVMM The GVMM instance data.
760 */
761DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
762{
763 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
764 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
765 AssertRC(rc);
766 return rc;
767}
768
769
770/**
771 * Request wrapper for the GVMMR0CreateVM API.
772 *
773 * @returns VBox status code.
774 * @param pReq The request buffer.
775 * @param pSession The session handle. The VM will be associated with this.
776 */
777GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
778{
779 /*
780 * Validate the request.
781 */
782 if (!VALID_PTR(pReq))
783 return VERR_INVALID_POINTER;
784 if (pReq->Hdr.cbReq != sizeof(*pReq))
785 return VERR_INVALID_PARAMETER;
786 if (pReq->pSession != pSession)
787 return VERR_INVALID_POINTER;
788
789 /*
790 * Execute it.
791 */
792 PVM pVM;
793 pReq->pVMR0 = NULL;
794 pReq->pVMR3 = NIL_RTR3PTR;
795 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pVM);
796 if (RT_SUCCESS(rc))
797 {
798 pReq->pVMR0 = pVM;
799 pReq->pVMR3 = pVM->pVMR3;
800 }
801 return rc;
802}
803
804
805/**
806 * Allocates the VM structure and registers it with GVM.
807 *
808 * The caller will become the VM owner and there by the EMT.
809 *
810 * @returns VBox status code.
811 * @param pSession The support driver session.
812 * @param cCpus Number of virtual CPUs for the new VM.
813 * @param ppVM Where to store the pointer to the VM structure.
814 *
815 * @thread EMT.
816 */
817GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppVM)
818{
819 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
820 PGVMM pGVMM;
821 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
822
823 AssertPtrReturn(ppVM, VERR_INVALID_POINTER);
824 *ppVM = NULL;
825
826 if ( cCpus == 0
827 || cCpus > VMM_MAX_CPU_COUNT)
828 return VERR_INVALID_PARAMETER;
829
830 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
831 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
832 RTPROCESS ProcId = RTProcSelf();
833 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
834
835 /*
836 * The whole allocation process is protected by the lock.
837 */
838 int rc = gvmmR0CreateDestroyLock(pGVMM);
839 AssertRCReturn(rc, rc);
840
841 /*
842 * Only one VM per session.
843 */
844 if (SUPR0GetSessionVM(pSession) != NULL)
845 {
846 gvmmR0CreateDestroyUnlock(pGVMM);
847 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
848 return VERR_ALREADY_EXISTS;
849 }
850
851 /*
852 * Allocate a handle first so we don't waste resources unnecessarily.
853 */
854 uint16_t iHandle = pGVMM->iFreeHead;
855 if (iHandle)
856 {
857 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
858
859 /* consistency checks, a bit paranoid as always. */
860 if ( !pHandle->pVM
861 && !pHandle->pGVM
862 && !pHandle->pvObj
863 && pHandle->iSelf == iHandle)
864 {
865 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
866 if (pHandle->pvObj)
867 {
868 /*
869 * Move the handle from the free to used list and perform permission checks.
870 */
871 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
872 AssertRC(rc);
873
874 pGVMM->iFreeHead = pHandle->iNext;
875 pHandle->iNext = pGVMM->iUsedHead;
876 pGVMM->iUsedHead = iHandle;
877 pGVMM->cVMs++;
878
879 pHandle->pVM = NULL;
880 pHandle->pGVM = NULL;
881 pHandle->pSession = pSession;
882 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
883 pHandle->ProcId = NIL_RTPROCESS;
884
885 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
886
887 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
888 if (RT_SUCCESS(rc))
889 {
890 /*
891 * Allocate the global VM structure (GVM) and initialize it.
892 */
893 PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]));
894 if (pGVM)
895 {
896 pGVM->u32Magic = GVM_MAGIC;
897 pGVM->hSelf = iHandle;
898 pGVM->pVM = NULL;
899 pGVM->cCpus = cCpus;
900 pGVM->pSession = pSession;
901
902 gvmmR0InitPerVMData(pGVM);
903 GMMR0InitPerVMData(pGVM);
904
905 /*
906 * Allocate the shared VM structure and associated page array.
907 */
908 const uint32_t cbVM = RT_UOFFSETOF_DYN(VM, aCpus[cCpus]);
909 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
910 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
911 if (RT_SUCCESS(rc))
912 {
913 PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM);
914 memset(pVM, 0, cPages << PAGE_SHIFT);
915 pVM->enmVMState = VMSTATE_CREATING;
916 pVM->pVMR0 = pVM;
917 pVM->pSession = pSession;
918 pVM->hSelf = iHandle;
919 pVM->cbSelf = cbVM;
920 pVM->cCpus = cCpus;
921 pVM->uCpuExecutionCap = 100; /* default is no cap. */
922 pVM->offVMCPU = RT_UOFFSETOF_DYN(VM, aCpus);
923 AssertCompileMemberAlignment(VM, cpum, 64);
924 AssertCompileMemberAlignment(VM, tm, 64);
925 AssertCompileMemberAlignment(VM, aCpus, PAGE_SIZE);
926
927 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
928 if (RT_SUCCESS(rc))
929 {
930 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
931 for (uint32_t iPage = 0; iPage < cPages; iPage++)
932 {
933 paPages[iPage].uReserved = 0;
934 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
935 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
936 }
937
938 /*
939 * Map them into ring-3.
940 */
941 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
942 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
943 if (RT_SUCCESS(rc))
944 {
945 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
946 pVM->pVMR3 = pVMR3;
947 AssertPtr((void *)pVMR3);
948
949 /* Initialize all the VM pointers. */
950 for (VMCPUID i = 0; i < cCpus; i++)
951 {
952 pVM->aCpus[i].idCpu = i;
953 pVM->aCpus[i].pVMR0 = pVM;
954 pVM->aCpus[i].pVMR3 = pVMR3;
955 pVM->aCpus[i].idHostCpu = NIL_RTCPUID;
956 pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
957 }
958
959 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
960 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
961 NIL_RTR0PROCESS);
962 if (RT_SUCCESS(rc))
963 {
964 pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
965 AssertPtr((void *)pVM->paVMPagesR3);
966
967 /* complete the handle - take the UsedLock sem just to be careful. */
968 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
969 AssertRC(rc);
970
971 pHandle->pVM = pVM;
972 pHandle->pGVM = pGVM;
973 pHandle->hEMT0 = hEMT0;
974 pHandle->ProcId = ProcId;
975 pGVM->pVM = pVM;
976 pGVM->pVMR3 = pVMR3;
977 pGVM->aCpus[0].hEMT = hEMT0;
978 pVM->aCpus[0].hNativeThreadR0 = hEMT0;
979 pGVMM->cEMTs += cCpus;
980
981 for (VMCPUID i = 0; i < cCpus; i++)
982 {
983 pGVM->aCpus[i].pVCpu = &pVM->aCpus[i];
984 pGVM->aCpus[i].pVM = pVM;
985 }
986
987 /* Associate it with the session and create the context hook for EMT0. */
988 rc = SUPR0SetSessionVM(pSession, pGVM, pVM);
989 if (RT_SUCCESS(rc))
990 {
991 rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[0]);
992 if (RT_SUCCESS(rc))
993 {
994 /*
995 * Done!
996 */
997 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pVM, ProcId, (void *)hEMT0, cCpus);
998
999 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1000 gvmmR0CreateDestroyUnlock(pGVMM);
1001
1002 CPUMR0RegisterVCpuThread(&pVM->aCpus[0]);
1003
1004 *ppVM = pVM;
1005 Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVMR3, pGVM, iHandle));
1006 return VINF_SUCCESS;
1007 }
1008
1009 SUPR0SetSessionVM(pSession, NULL, NULL);
1010 }
1011 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1012 }
1013
1014 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1015 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1016 }
1017 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
1018 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1019 }
1020 RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
1021 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1022 }
1023 }
1024 }
1025 /* else: The user wasn't permitted to create this VM. */
1026
1027 /*
1028 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1029 * object reference here. A little extra mess because of non-recursive lock.
1030 */
1031 void *pvObj = pHandle->pvObj;
1032 pHandle->pvObj = NULL;
1033 gvmmR0CreateDestroyUnlock(pGVMM);
1034
1035 SUPR0ObjRelease(pvObj, pSession);
1036
1037 SUPR0Printf("GVMMR0CreateVM: failed, rc=%d\n", rc);
1038 return rc;
1039 }
1040
1041 rc = VERR_NO_MEMORY;
1042 }
1043 else
1044 rc = VERR_GVMM_IPE_1;
1045 }
1046 else
1047 rc = VERR_GVM_TOO_MANY_VMS;
1048
1049 gvmmR0CreateDestroyUnlock(pGVMM);
1050 return rc;
1051}
1052
1053
1054/**
1055 * Initializes the per VM data belonging to GVMM.
1056 *
1057 * @param pGVM Pointer to the global VM structure.
1058 */
1059static void gvmmR0InitPerVMData(PGVM pGVM)
1060{
1061 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1062 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1063 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1064 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1065 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1066 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1067 pGVM->gvmm.s.fDoneVMMR0Init = false;
1068 pGVM->gvmm.s.fDoneVMMR0Term = false;
1069
1070 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1071 {
1072 pGVM->aCpus[i].idCpu = i;
1073 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1074 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1075 pGVM->aCpus[i].pGVM = pGVM;
1076 pGVM->aCpus[i].pVCpu = NULL;
1077 pGVM->aCpus[i].pVM = NULL;
1078 }
1079}
1080
1081
1082/**
1083 * Does the VM initialization.
1084 *
1085 * @returns VBox status code.
1086 * @param pGVM The global (ring-0) VM structure.
1087 */
1088GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1089{
1090 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1091
1092 int rc = VERR_INTERNAL_ERROR_3;
1093 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1094 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1095 {
1096 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1097 {
1098 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1099 if (RT_FAILURE(rc))
1100 {
1101 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1102 break;
1103 }
1104 }
1105 }
1106 else
1107 rc = VERR_WRONG_ORDER;
1108
1109 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1110 return rc;
1111}
1112
1113
1114/**
1115 * Indicates that we're done with the ring-0 initialization
1116 * of the VM.
1117 *
1118 * @param pGVM The global (ring-0) VM structure.
1119 * @thread EMT(0)
1120 */
1121GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1122{
1123 /* Set the indicator. */
1124 pGVM->gvmm.s.fDoneVMMR0Init = true;
1125}
1126
1127
1128/**
1129 * Indicates that we're doing the ring-0 termination of the VM.
1130 *
1131 * @returns true if termination hasn't been done already, false if it has.
1132 * @param pGVM Pointer to the global VM structure. Optional.
1133 * @thread EMT(0) or session cleanup thread.
1134 */
1135GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1136{
1137 /* Validate the VM structure, state and handle. */
1138 AssertPtrReturn(pGVM, false);
1139
1140 /* Set the indicator. */
1141 if (pGVM->gvmm.s.fDoneVMMR0Term)
1142 return false;
1143 pGVM->gvmm.s.fDoneVMMR0Term = true;
1144 return true;
1145}
1146
1147
1148/**
1149 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1150 *
1151 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1152 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1153 * would've been nice if the caller was actually the EMT thread or that we somehow
1154 * could've associated the calling thread with the VM up front.
1155 *
1156 * @returns VBox status code.
1157 * @param pGVM The global (ring-0) VM structure.
1158 * @param pVM The cross context VM structure.
1159 *
1160 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1161 */
1162GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM, PVM pVM)
1163{
1164 LogFlow(("GVMMR0DestroyVM: pGVM=%p pVM=%p\n", pGVM, pVM));
1165 PGVMM pGVMM;
1166 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1167
1168 /*
1169 * Validate the VM structure, state and caller.
1170 */
1171 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1172 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1173 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1174 AssertReturn(pGVM->pVM == pVM, VERR_INVALID_POINTER);
1175 AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState),
1176 VERR_WRONG_ORDER);
1177
1178 uint32_t hGVM = pGVM->hSelf;
1179 ASMCompilerBarrier();
1180 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1181 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1182
1183 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1184 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1185
1186 RTPROCESS ProcId = RTProcSelf();
1187 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1188 AssertReturn( ( pHandle->hEMT0 == hSelf
1189 && pHandle->ProcId == ProcId)
1190 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1191
1192 /*
1193 * Lookup the handle and destroy the object.
1194 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1195 * object, we take some precautions against racing callers just in case...
1196 */
1197 int rc = gvmmR0CreateDestroyLock(pGVMM);
1198 AssertRC(rc);
1199
1200 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1201 if ( pHandle->pVM == pVM
1202 && ( ( pHandle->hEMT0 == hSelf
1203 && pHandle->ProcId == ProcId)
1204 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1205 && VALID_PTR(pHandle->pvObj)
1206 && VALID_PTR(pHandle->pSession)
1207 && VALID_PTR(pHandle->pGVM)
1208 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1209 {
1210 /* Check that other EMTs have deregistered. */
1211 uint32_t cNotDeregistered = 0;
1212 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1213 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */
1214 if (cNotDeregistered == 0)
1215 {
1216 /* Grab the object pointer. */
1217 void *pvObj = pHandle->pvObj;
1218 pHandle->pvObj = NULL;
1219 gvmmR0CreateDestroyUnlock(pGVMM);
1220
1221 SUPR0ObjRelease(pvObj, pHandle->pSession);
1222 }
1223 else
1224 {
1225 gvmmR0CreateDestroyUnlock(pGVMM);
1226 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1227 }
1228 }
1229 else
1230 {
1231 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n",
1232 pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf);
1233 gvmmR0CreateDestroyUnlock(pGVMM);
1234 rc = VERR_GVMM_IPE_2;
1235 }
1236
1237 return rc;
1238}
1239
1240
1241/**
1242 * Performs VM cleanup task as part of object destruction.
1243 *
1244 * @param pGVM The GVM pointer.
1245 */
1246static void gvmmR0CleanupVM(PGVM pGVM)
1247{
1248 if ( pGVM->gvmm.s.fDoneVMMR0Init
1249 && !pGVM->gvmm.s.fDoneVMMR0Term)
1250 {
1251 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1252 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM)
1253 {
1254 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1255 VMMR0TermVM(pGVM, pGVM->pVM, NIL_VMCPUID);
1256 }
1257 else
1258 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM));
1259 }
1260
1261 GMMR0CleanupVM(pGVM);
1262#ifdef VBOX_WITH_NEM_R0
1263 NEMR0CleanupVM(pGVM);
1264#endif
1265
1266 AssertCompile((uintptr_t)NIL_RTTHREADCTXHOOK == 0); /* Depends on zero initialized memory working for NIL at the moment. */
1267 for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++)
1268 {
1269 /** @todo Can we busy wait here for all thread-context hooks to be
1270 * deregistered before releasing (destroying) it? Only until we find a
1271 * solution for not deregistering hooks everytime we're leaving HMR0
1272 * context. */
1273 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->pVM->aCpus[idCpu]);
1274 }
1275}
1276
1277
1278/**
1279 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1280 *
1281 * pvUser1 is the GVM instance pointer.
1282 * pvUser2 is the handle pointer.
1283 */
1284static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1285{
1286 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1287
1288 NOREF(pvObj);
1289
1290 /*
1291 * Some quick, paranoid, input validation.
1292 */
1293 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1294 AssertPtr(pHandle);
1295 PGVMM pGVMM = (PGVMM)pvUser1;
1296 Assert(pGVMM == g_pGVMM);
1297 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1298 if ( !iHandle
1299 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1300 || iHandle != pHandle->iSelf)
1301 {
1302 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1303 return;
1304 }
1305
1306 int rc = gvmmR0CreateDestroyLock(pGVMM);
1307 AssertRC(rc);
1308 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1309 AssertRC(rc);
1310
1311 /*
1312 * This is a tad slow but a doubly linked list is too much hassle.
1313 */
1314 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1315 {
1316 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1317 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1318 gvmmR0CreateDestroyUnlock(pGVMM);
1319 return;
1320 }
1321
1322 if (pGVMM->iUsedHead == iHandle)
1323 pGVMM->iUsedHead = pHandle->iNext;
1324 else
1325 {
1326 uint16_t iPrev = pGVMM->iUsedHead;
1327 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1328 while (iPrev)
1329 {
1330 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1331 {
1332 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1333 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1334 gvmmR0CreateDestroyUnlock(pGVMM);
1335 return;
1336 }
1337 if (RT_UNLIKELY(c-- <= 0))
1338 {
1339 iPrev = 0;
1340 break;
1341 }
1342
1343 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1344 break;
1345 iPrev = pGVMM->aHandles[iPrev].iNext;
1346 }
1347 if (!iPrev)
1348 {
1349 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1350 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1351 gvmmR0CreateDestroyUnlock(pGVMM);
1352 return;
1353 }
1354
1355 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1356 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1357 }
1358 pHandle->iNext = 0;
1359 pGVMM->cVMs--;
1360
1361 /*
1362 * Do the global cleanup round.
1363 */
1364 PGVM pGVM = pHandle->pGVM;
1365 if ( VALID_PTR(pGVM)
1366 && pGVM->u32Magic == GVM_MAGIC)
1367 {
1368 pGVMM->cEMTs -= pGVM->cCpus;
1369
1370 if (pGVM->pSession)
1371 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1372
1373 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1374
1375 gvmmR0CleanupVM(pGVM);
1376
1377 /*
1378 * Do the GVMM cleanup - must be done last.
1379 */
1380 /* The VM and VM pages mappings/allocations. */
1381 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1382 {
1383 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1384 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1385 }
1386
1387 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1388 {
1389 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1390 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1391 }
1392
1393 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1394 {
1395 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1396 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1397 }
1398
1399 if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ)
1400 {
1401 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc);
1402 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1403 }
1404
1405 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1406 {
1407 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1408 {
1409 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1410 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1411 }
1412 }
1413
1414 /* the GVM structure itself. */
1415 pGVM->u32Magic |= UINT32_C(0x80000000);
1416 RTMemFree(pGVM);
1417
1418 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1419 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1420 AssertRC(rc);
1421 }
1422 /* else: GVMMR0CreateVM cleanup. */
1423
1424 /*
1425 * Free the handle.
1426 */
1427 pHandle->iNext = pGVMM->iFreeHead;
1428 pGVMM->iFreeHead = iHandle;
1429 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1430 ASMAtomicWriteNullPtr(&pHandle->pVM);
1431 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1432 ASMAtomicWriteNullPtr(&pHandle->pSession);
1433 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1434 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1435
1436 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1437 gvmmR0CreateDestroyUnlock(pGVMM);
1438 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1439}
1440
1441
1442/**
1443 * Registers the calling thread as the EMT of a Virtual CPU.
1444 *
1445 * Note that VCPU 0 is automatically registered during VM creation.
1446 *
1447 * @returns VBox status code
1448 * @param pGVM The global (ring-0) VM structure.
1449 * @param pVM The cross context VM structure.
1450 * @param idCpu VCPU id to register the current thread as.
1451 */
1452GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1453{
1454 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1455
1456 /*
1457 * Validate the VM structure, state and handle.
1458 */
1459 PGVMM pGVMM;
1460 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /* fTakeUsedLock */); /** @todo take lock here. */
1461 if (RT_SUCCESS(rc))
1462 {
1463 if (idCpu < pGVM->cCpus)
1464 {
1465 /* Check that the EMT isn't already assigned to a thread. */
1466 if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
1467 {
1468 Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1469
1470 /* A thread may only be one EMT. */
1471 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1472 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1473 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1474 if (RT_SUCCESS(rc))
1475 {
1476 /*
1477 * Do the assignment, then try setup the hook. Undo if that fails.
1478 */
1479 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1480
1481 rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[idCpu]);
1482 if (RT_SUCCESS(rc))
1483 CPUMR0RegisterVCpuThread(&pVM->aCpus[idCpu]);
1484 else
1485 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1486 }
1487 }
1488 else
1489 rc = VERR_ACCESS_DENIED;
1490 }
1491 else
1492 rc = VERR_INVALID_CPU_ID;
1493 }
1494 return rc;
1495}
1496
1497
1498/**
1499 * Deregisters the calling thread as the EMT of a Virtual CPU.
1500 *
1501 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1502 *
1503 * @returns VBox status code
1504 * @param pGVM The global (ring-0) VM structure.
1505 * @param pVM The cross context VM structure.
1506 * @param idCpu VCPU id to register the current thread as.
1507 */
1508GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1509{
1510 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1511
1512 /*
1513 * Validate the VM structure, state and handle.
1514 */
1515 PGVMM pGVMM;
1516 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
1517 if (RT_SUCCESS(rc))
1518 {
1519 /*
1520 * Take the destruction lock and recheck the handle state to
1521 * prevent racing GVMMR0DestroyVM.
1522 */
1523 gvmmR0CreateDestroyLock(pGVMM);
1524 uint32_t hSelf = pGVM->hSelf;
1525 ASMCompilerBarrier();
1526 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1527 && pGVMM->aHandles[hSelf].pvObj != NULL
1528 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1529 {
1530 /*
1531 * Do per-EMT cleanups.
1532 */
1533 VMMR0ThreadCtxHookDestroyForEmt(&pVM->aCpus[idCpu]);
1534
1535 /*
1536 * Invalidate hEMT. We don't use NIL here as that would allow
1537 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1538 */
1539 AssertCompile(~(RTNATIVETHREAD)1 != NIL_RTNATIVETHREAD);
1540 pGVM->aCpus[idCpu].hEMT = ~(RTNATIVETHREAD)1;
1541 pVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1542 }
1543
1544 gvmmR0CreateDestroyUnlock(pGVMM);
1545 }
1546 return rc;
1547}
1548
1549
1550/**
1551 * Lookup a GVM structure by its handle.
1552 *
1553 * @returns The GVM pointer on success, NULL on failure.
1554 * @param hGVM The global VM handle. Asserts on bad handle.
1555 */
1556GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1557{
1558 PGVMM pGVMM;
1559 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1560
1561 /*
1562 * Validate.
1563 */
1564 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1565 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1566
1567 /*
1568 * Look it up.
1569 */
1570 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1571 AssertPtrReturn(pHandle->pVM, NULL);
1572 AssertPtrReturn(pHandle->pvObj, NULL);
1573 PGVM pGVM = pHandle->pGVM;
1574 AssertPtrReturn(pGVM, NULL);
1575 AssertReturn(pGVM->pVM == pHandle->pVM, NULL);
1576
1577 return pHandle->pGVM;
1578}
1579
1580
1581/**
1582 * Lookup a GVM structure by the shared VM structure.
1583 *
1584 * The calling thread must be in the same process as the VM. All current lookups
1585 * are by threads inside the same process, so this will not be an issue.
1586 *
1587 * @returns VBox status code.
1588 * @param pVM The cross context VM structure.
1589 * @param ppGVM Where to store the GVM pointer.
1590 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1591 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1592 * shared mode when requested.
1593 *
1594 * Be very careful if not taking the lock as it's
1595 * possible that the VM will disappear then!
1596 *
1597 * @remark This will not assert on an invalid pVM but try return silently.
1598 */
1599static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1600{
1601 RTPROCESS ProcId = RTProcSelf();
1602 PGVMM pGVMM;
1603 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1604
1605 /*
1606 * Validate.
1607 */
1608 if (RT_UNLIKELY( !VALID_PTR(pVM)
1609 || ((uintptr_t)pVM & PAGE_OFFSET_MASK)))
1610 return VERR_INVALID_POINTER;
1611 if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING
1612 || pVM->enmVMState >= VMSTATE_TERMINATED))
1613 return VERR_INVALID_POINTER;
1614
1615 uint16_t hGVM = pVM->hSelf;
1616 ASMCompilerBarrier();
1617 if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE
1618 || hGVM >= RT_ELEMENTS(pGVMM->aHandles)))
1619 return VERR_INVALID_HANDLE;
1620
1621 /*
1622 * Look it up.
1623 */
1624 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1625 PGVM pGVM;
1626 if (fTakeUsedLock)
1627 {
1628 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1629 AssertRCReturn(rc, rc);
1630
1631 pGVM = pHandle->pGVM;
1632 if (RT_UNLIKELY( pHandle->pVM != pVM
1633 || pHandle->ProcId != ProcId
1634 || !VALID_PTR(pHandle->pvObj)
1635 || !VALID_PTR(pGVM)
1636 || pGVM->pVM != pVM))
1637 {
1638 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1639 return VERR_INVALID_HANDLE;
1640 }
1641 }
1642 else
1643 {
1644 if (RT_UNLIKELY(pHandle->pVM != pVM))
1645 return VERR_INVALID_HANDLE;
1646 if (RT_UNLIKELY(pHandle->ProcId != ProcId))
1647 return VERR_INVALID_HANDLE;
1648 if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj)))
1649 return VERR_INVALID_HANDLE;
1650
1651 pGVM = pHandle->pGVM;
1652 if (RT_UNLIKELY(!VALID_PTR(pGVM)))
1653 return VERR_INVALID_HANDLE;
1654 if (RT_UNLIKELY(pGVM->pVM != pVM))
1655 return VERR_INVALID_HANDLE;
1656 }
1657
1658 *ppGVM = pGVM;
1659 *ppGVMM = pGVMM;
1660 return VINF_SUCCESS;
1661}
1662
1663
1664/**
1665 * Fast look up a GVM structure by the cross context VM structure.
1666 *
1667 * This is mainly used a glue function, so performance is .
1668 *
1669 * @returns GVM on success, NULL on failure.
1670 * @param pVM The cross context VM structure. ASSUMES to be
1671 * reasonably valid, so we can do fewer checks than in
1672 * gvmmR0ByVM.
1673 *
1674 * @note Do not use this on pVM structures from userland!
1675 */
1676GVMMR0DECL(PGVM) GVMMR0FastGetGVMByVM(PVM pVM)
1677{
1678 AssertPtr(pVM);
1679 Assert(!((uintptr_t)pVM & PAGE_OFFSET_MASK));
1680
1681 PGVMM pGVMM;
1682 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1683
1684 /*
1685 * Validate.
1686 */
1687 uint16_t hGVM = pVM->hSelf;
1688 ASMCompilerBarrier();
1689 AssertReturn(hGVM != NIL_GVM_HANDLE && hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1690
1691 /*
1692 * Look it up and check pVM against the value in the handle and GVM structures.
1693 */
1694 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1695 AssertReturn(pHandle->pVM == pVM, NULL);
1696
1697 PGVM pGVM = pHandle->pGVM;
1698 AssertPtrReturn(pGVM, NULL);
1699 AssertReturn(pGVM->pVM == pVM, NULL);
1700
1701 return pGVM;
1702}
1703
1704
1705/**
1706 * Check that the given GVM and VM structures match up.
1707 *
1708 * The calling thread must be in the same process as the VM. All current lookups
1709 * are by threads inside the same process, so this will not be an issue.
1710 *
1711 * @returns VBox status code.
1712 * @param pGVM The global (ring-0) VM structure.
1713 * @param pVM The cross context VM structure.
1714 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1715 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1716 * shared mode when requested.
1717 *
1718 * Be very careful if not taking the lock as it's
1719 * possible that the VM will disappear then!
1720 *
1721 * @remark This will not assert on an invalid pVM but try return silently.
1722 */
1723static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1724{
1725 /*
1726 * Check the pointers.
1727 */
1728 int rc;
1729 if (RT_LIKELY(RT_VALID_PTR(pGVM)))
1730 {
1731 if (RT_LIKELY( RT_VALID_PTR(pVM)
1732 && ((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0))
1733 {
1734 if (RT_LIKELY(pGVM->pVM == pVM))
1735 {
1736 /*
1737 * Get the pGVMM instance and check the VM handle.
1738 */
1739 PGVMM pGVMM;
1740 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1741
1742 uint16_t hGVM = pGVM->hSelf;
1743 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1744 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1745 {
1746 RTPROCESS const pidSelf = RTProcSelf();
1747 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1748 if (fTakeUsedLock)
1749 {
1750 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1751 AssertRCReturn(rc, rc);
1752 }
1753
1754 if (RT_LIKELY( pHandle->pGVM == pGVM
1755 && pHandle->pVM == pVM
1756 && pHandle->ProcId == pidSelf
1757 && RT_VALID_PTR(pHandle->pvObj)))
1758 {
1759 /*
1760 * Some more VM data consistency checks.
1761 */
1762 if (RT_LIKELY( pVM->cCpus == pGVM->cCpus
1763 && pVM->hSelf == hGVM
1764 && pVM->enmVMState >= VMSTATE_CREATING
1765 && pVM->enmVMState <= VMSTATE_TERMINATED
1766 && pVM->pVMR0 == pVM))
1767 {
1768 *ppGVMM = pGVMM;
1769 return VINF_SUCCESS;
1770 }
1771 }
1772
1773 if (fTakeUsedLock)
1774 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1775 }
1776 }
1777 rc = VERR_INVALID_VM_HANDLE;
1778 }
1779 else
1780 rc = VERR_INVALID_POINTER;
1781 }
1782 else
1783 rc = VERR_INVALID_POINTER;
1784 return rc;
1785}
1786
1787
1788/**
1789 * Check that the given GVM and VM structures match up.
1790 *
1791 * The calling thread must be in the same process as the VM. All current lookups
1792 * are by threads inside the same process, so this will not be an issue.
1793 *
1794 * @returns VBox status code.
1795 * @param pGVM The global (ring-0) VM structure.
1796 * @param pVM The cross context VM structure.
1797 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
1798 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1799 * @thread EMT
1800 *
1801 * @remarks This will assert in all failure paths.
1802 */
1803static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM)
1804{
1805 /*
1806 * Check the pointers.
1807 */
1808 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1809
1810 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1811 AssertReturn(((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
1812 AssertReturn(pGVM->pVM == pVM, VERR_INVALID_VM_HANDLE);
1813
1814
1815 /*
1816 * Get the pGVMM instance and check the VM handle.
1817 */
1818 PGVMM pGVMM;
1819 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1820
1821 uint16_t hGVM = pGVM->hSelf;
1822 ASMCompilerBarrier();
1823 AssertReturn( hGVM != NIL_GVM_HANDLE
1824 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1825
1826 RTPROCESS const pidSelf = RTProcSelf();
1827 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1828 AssertReturn( pHandle->pGVM == pGVM
1829 && pHandle->pVM == pVM
1830 && pHandle->ProcId == pidSelf
1831 && RT_VALID_PTR(pHandle->pvObj),
1832 VERR_INVALID_HANDLE);
1833
1834 /*
1835 * Check the EMT claim.
1836 */
1837 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
1838 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1839 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1840
1841 /*
1842 * Some more VM data consistency checks.
1843 */
1844 AssertReturn(pVM->cCpus == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
1845 AssertReturn(pVM->hSelf == hGVM, VERR_INCONSISTENT_VM_HANDLE);
1846 AssertReturn(pVM->pVMR0 == pVM, VERR_INCONSISTENT_VM_HANDLE);
1847 AssertReturn( pVM->enmVMState >= VMSTATE_CREATING
1848 && pVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
1849
1850 *ppGVMM = pGVMM;
1851 return VINF_SUCCESS;
1852}
1853
1854
1855/**
1856 * Validates a GVM/VM pair.
1857 *
1858 * @returns VBox status code.
1859 * @param pGVM The global (ring-0) VM structure.
1860 * @param pVM The cross context VM structure.
1861 */
1862GVMMR0DECL(int) GVMMR0ValidateGVMandVM(PGVM pGVM, PVM pVM)
1863{
1864 PGVMM pGVMM;
1865 return gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /*fTakeUsedLock*/);
1866}
1867
1868
1869
1870/**
1871 * Validates a GVM/VM/EMT combo.
1872 *
1873 * @returns VBox status code.
1874 * @param pGVM The global (ring-0) VM structure.
1875 * @param pVM The cross context VM structure.
1876 * @param idCpu The Virtual CPU ID of the calling EMT.
1877 * @thread EMT(idCpu)
1878 */
1879GVMMR0DECL(int) GVMMR0ValidateGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1880{
1881 PGVMM pGVMM;
1882 return gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
1883}
1884
1885
1886/**
1887 * Looks up the VM belonging to the specified EMT thread.
1888 *
1889 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1890 * unnecessary kernel panics when the EMT thread hits an assertion. The
1891 * call may or not be an EMT thread.
1892 *
1893 * @returns Pointer to the VM on success, NULL on failure.
1894 * @param hEMT The native thread handle of the EMT.
1895 * NIL_RTNATIVETHREAD means the current thread
1896 */
1897GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1898{
1899 /*
1900 * No Assertions here as we're usually called in a AssertMsgN or
1901 * RTAssert* context.
1902 */
1903 PGVMM pGVMM = g_pGVMM;
1904 if ( !VALID_PTR(pGVMM)
1905 || pGVMM->u32Magic != GVMM_MAGIC)
1906 return NULL;
1907
1908 if (hEMT == NIL_RTNATIVETHREAD)
1909 hEMT = RTThreadNativeSelf();
1910 RTPROCESS ProcId = RTProcSelf();
1911
1912 /*
1913 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1914 */
1915/** @todo introduce some pid hash table here, please. */
1916 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1917 {
1918 if ( pGVMM->aHandles[i].iSelf == i
1919 && pGVMM->aHandles[i].ProcId == ProcId
1920 && VALID_PTR(pGVMM->aHandles[i].pvObj)
1921 && VALID_PTR(pGVMM->aHandles[i].pVM)
1922 && VALID_PTR(pGVMM->aHandles[i].pGVM))
1923 {
1924 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1925 return pGVMM->aHandles[i].pVM;
1926
1927 /* This is fearly safe with the current process per VM approach. */
1928 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1929 VMCPUID const cCpus = pGVM->cCpus;
1930 ASMCompilerBarrier();
1931 if ( cCpus < 1
1932 || cCpus > VMM_MAX_CPU_COUNT)
1933 continue;
1934 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1935 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1936 return pGVMM->aHandles[i].pVM;
1937 }
1938 }
1939 return NULL;
1940}
1941
1942
1943/**
1944 * Looks up the GVMCPU belonging to the specified EMT thread.
1945 *
1946 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1947 * unnecessary kernel panics when the EMT thread hits an assertion. The
1948 * call may or not be an EMT thread.
1949 *
1950 * @returns Pointer to the VM on success, NULL on failure.
1951 * @param hEMT The native thread handle of the EMT.
1952 * NIL_RTNATIVETHREAD means the current thread
1953 */
1954GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
1955{
1956 /*
1957 * No Assertions here as we're usually called in a AssertMsgN,
1958 * RTAssert*, Log and LogRel contexts.
1959 */
1960 PGVMM pGVMM = g_pGVMM;
1961 if ( !VALID_PTR(pGVMM)
1962 || pGVMM->u32Magic != GVMM_MAGIC)
1963 return NULL;
1964
1965 if (hEMT == NIL_RTNATIVETHREAD)
1966 hEMT = RTThreadNativeSelf();
1967 RTPROCESS ProcId = RTProcSelf();
1968
1969 /*
1970 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1971 */
1972/** @todo introduce some pid hash table here, please. */
1973 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1974 {
1975 if ( pGVMM->aHandles[i].iSelf == i
1976 && pGVMM->aHandles[i].ProcId == ProcId
1977 && VALID_PTR(pGVMM->aHandles[i].pvObj)
1978 && VALID_PTR(pGVMM->aHandles[i].pVM)
1979 && VALID_PTR(pGVMM->aHandles[i].pGVM))
1980 {
1981 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1982 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1983 return &pGVM->aCpus[0];
1984
1985 /* This is fearly safe with the current process per VM approach. */
1986 VMCPUID const cCpus = pGVM->cCpus;
1987 ASMCompilerBarrier();
1988 ASMCompilerBarrier();
1989 if ( cCpus < 1
1990 || cCpus > VMM_MAX_CPU_COUNT)
1991 continue;
1992 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1993 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1994 return &pGVM->aCpus[idCpu];
1995 }
1996 }
1997 return NULL;
1998}
1999
2000
2001/**
2002 * This is will wake up expired and soon-to-be expired VMs.
2003 *
2004 * @returns Number of VMs that has been woken up.
2005 * @param pGVMM Pointer to the GVMM instance data.
2006 * @param u64Now The current time.
2007 */
2008static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
2009{
2010 /*
2011 * Skip this if we've got disabled because of high resolution wakeups or by
2012 * the user.
2013 */
2014 if (!pGVMM->fDoEarlyWakeUps)
2015 return 0;
2016
2017/** @todo Rewrite this algorithm. See performance defect XYZ. */
2018
2019 /*
2020 * A cheap optimization to stop wasting so much time here on big setups.
2021 */
2022 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
2023 if ( pGVMM->cHaltedEMTs == 0
2024 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
2025 return 0;
2026
2027 /*
2028 * Only one thread doing this at a time.
2029 */
2030 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
2031 return 0;
2032
2033 /*
2034 * The first pass will wake up VMs which have actually expired
2035 * and look for VMs that should be woken up in the 2nd and 3rd passes.
2036 */
2037 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
2038 uint64_t u64Min = UINT64_MAX;
2039 unsigned cWoken = 0;
2040 unsigned cHalted = 0;
2041 unsigned cTodo2nd = 0;
2042 unsigned cTodo3rd = 0;
2043 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2044 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2045 i = pGVMM->aHandles[i].iNext)
2046 {
2047 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2048 if ( VALID_PTR(pCurGVM)
2049 && pCurGVM->u32Magic == GVM_MAGIC)
2050 {
2051 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2052 {
2053 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2054 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2055 if (u64)
2056 {
2057 if (u64 <= u64Now)
2058 {
2059 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2060 {
2061 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2062 AssertRC(rc);
2063 cWoken++;
2064 }
2065 }
2066 else
2067 {
2068 cHalted++;
2069 if (u64 <= uNsEarlyWakeUp1)
2070 cTodo2nd++;
2071 else if (u64 <= uNsEarlyWakeUp2)
2072 cTodo3rd++;
2073 else if (u64 < u64Min)
2074 u64 = u64Min;
2075 }
2076 }
2077 }
2078 }
2079 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2080 }
2081
2082 if (cTodo2nd)
2083 {
2084 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2085 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2086 i = pGVMM->aHandles[i].iNext)
2087 {
2088 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2089 if ( VALID_PTR(pCurGVM)
2090 && pCurGVM->u32Magic == GVM_MAGIC)
2091 {
2092 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2093 {
2094 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2095 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2096 if ( u64
2097 && u64 <= uNsEarlyWakeUp1)
2098 {
2099 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2100 {
2101 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2102 AssertRC(rc);
2103 cWoken++;
2104 }
2105 }
2106 }
2107 }
2108 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2109 }
2110 }
2111
2112 if (cTodo3rd)
2113 {
2114 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2115 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2116 i = pGVMM->aHandles[i].iNext)
2117 {
2118 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2119 if ( VALID_PTR(pCurGVM)
2120 && pCurGVM->u32Magic == GVM_MAGIC)
2121 {
2122 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2123 {
2124 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2125 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2126 if ( u64
2127 && u64 <= uNsEarlyWakeUp2)
2128 {
2129 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2130 {
2131 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2132 AssertRC(rc);
2133 cWoken++;
2134 }
2135 }
2136 }
2137 }
2138 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2139 }
2140 }
2141
2142 /*
2143 * Set the minimum value.
2144 */
2145 pGVMM->uNsNextEmtWakeup = u64Min;
2146
2147 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2148 return cWoken;
2149}
2150
2151
2152/**
2153 * Halt the EMT thread.
2154 *
2155 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2156 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2157 * @param pGVM The global (ring-0) VM structure.
2158 * @param pVM The cross context VM structure.
2159 * @param idCpu The Virtual CPU ID of the calling EMT.
2160 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2161 * @thread EMT(idCpu).
2162 */
2163GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PVM pVM, PGVMCPU pCurGVCpu, uint64_t u64ExpireGipTime)
2164{
2165 LogFlow(("GVMMR0SchedHalt: pGVM=%p pVM=%p pCurGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2166 pGVM, pVM, pCurGVCpu, pCurGVCpu->idCpu, u64ExpireGipTime));
2167 GVMM_CHECK_SMAP_SETUP();
2168 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2169
2170 PGVMM pGVMM;
2171 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2172
2173 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2174 Assert(!pCurGVCpu->gvmm.s.u64HaltExpire);
2175
2176 /*
2177 * If we're doing early wake-ups, we must take the UsedList lock before we
2178 * start querying the current time.
2179 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2180 */
2181 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2182 if (fDoEarlyWakeUps)
2183 {
2184 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2185 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2186 }
2187
2188 pCurGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
2189
2190 /* GIP hack: We might are frequently sleeping for short intervals where the
2191 difference between GIP and system time matters on systems with high resolution
2192 system time. So, convert the input from GIP to System time in that case. */
2193 Assert(ASMGetFlags() & X86_EFL_IF);
2194 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2195 const uint64_t u64NowGip = RTTimeNanoTS();
2196 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2197
2198 if (fDoEarlyWakeUps)
2199 {
2200 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2201 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2202 }
2203
2204 /*
2205 * Go to sleep if we must...
2206 * Cap the sleep time to 1 second to be on the safe side.
2207 */
2208 int rc;
2209 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2210 if ( u64NowGip < u64ExpireGipTime
2211 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2212 ? pGVMM->nsMinSleepCompany
2213 : pGVMM->nsMinSleepAlone))
2214 {
2215 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2216 if (cNsInterval > RT_NS_1SEC)
2217 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2218 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2219 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2220 if (fDoEarlyWakeUps)
2221 {
2222 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2223 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2224 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2225 }
2226 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2227
2228 rc = RTSemEventMultiWaitEx(pCurGVCpu->gvmm.s.HaltEventMulti,
2229 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2230 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2231 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2232
2233 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0);
2234 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2235
2236 /* Reset the semaphore to try prevent a few false wake-ups. */
2237 if (rc == VINF_SUCCESS)
2238 {
2239 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
2240 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2241 }
2242 else if (rc == VERR_TIMEOUT)
2243 {
2244 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2245 rc = VINF_SUCCESS;
2246 }
2247 }
2248 else
2249 {
2250 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2251 if (fDoEarlyWakeUps)
2252 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2253 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2254 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
2255 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2256 rc = VINF_SUCCESS;
2257 }
2258
2259 return rc;
2260}
2261
2262
2263/**
2264 * Halt the EMT thread.
2265 *
2266 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2267 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2268 * @param pGVM The global (ring-0) VM structure.
2269 * @param pVM The cross context VM structure.
2270 * @param idCpu The Virtual CPU ID of the calling EMT.
2271 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2272 * @thread EMT(idCpu).
2273 */
2274GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2275{
2276 GVMM_CHECK_SMAP_SETUP();
2277 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2278 PGVMM pGVMM;
2279 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2280 if (RT_SUCCESS(rc))
2281 {
2282 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2283 rc = GVMMR0SchedHalt(pGVM, pVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2284 }
2285 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2286 return rc;
2287}
2288
2289
2290
2291/**
2292 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2293 * the a sleeping EMT.
2294 *
2295 * @retval VINF_SUCCESS if successfully woken up.
2296 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2297 *
2298 * @param pGVM The global (ring-0) VM structure.
2299 * @param pGVCpu The global (ring-0) VCPU structure.
2300 */
2301DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2302{
2303 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2304
2305 /*
2306 * Signal the semaphore regardless of whether it's current blocked on it.
2307 *
2308 * The reason for this is that there is absolutely no way we can be 100%
2309 * certain that it isn't *about* go to go to sleep on it and just got
2310 * delayed a bit en route. So, we will always signal the semaphore when
2311 * the it is flagged as halted in the VMM.
2312 */
2313/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2314 int rc;
2315 if (pGVCpu->gvmm.s.u64HaltExpire)
2316 {
2317 rc = VINF_SUCCESS;
2318 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2319 }
2320 else
2321 {
2322 rc = VINF_GVM_NOT_BLOCKED;
2323 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2324 }
2325
2326 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2327 AssertRC(rc2);
2328
2329 return rc;
2330}
2331
2332
2333/**
2334 * Wakes up the halted EMT thread so it can service a pending request.
2335 *
2336 * @returns VBox status code.
2337 * @retval VINF_SUCCESS if successfully woken up.
2338 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2339 *
2340 * @param pGVM The global (ring-0) VM structure.
2341 * @param pVM The cross context VM structure.
2342 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2343 * @param fTakeUsedLock Take the used lock or not
2344 * @thread Any but EMT(idCpu).
2345 */
2346GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
2347{
2348 GVMM_CHECK_SMAP_SETUP();
2349 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2350
2351 /*
2352 * Validate input and take the UsedLock.
2353 */
2354 PGVMM pGVMM;
2355 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
2356 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2357 if (RT_SUCCESS(rc))
2358 {
2359 if (idCpu < pGVM->cCpus)
2360 {
2361 /*
2362 * Do the actual job.
2363 */
2364 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2365 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2366
2367 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2368 {
2369 /*
2370 * While we're here, do a round of scheduling.
2371 */
2372 Assert(ASMGetFlags() & X86_EFL_IF);
2373 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2374 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2375 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2376 }
2377 }
2378 else
2379 rc = VERR_INVALID_CPU_ID;
2380
2381 if (fTakeUsedLock)
2382 {
2383 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2384 AssertRC(rc2);
2385 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2386 }
2387 }
2388
2389 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2390 return rc;
2391}
2392
2393
2394/**
2395 * Wakes up the halted EMT thread so it can service a pending request.
2396 *
2397 * @returns VBox status code.
2398 * @retval VINF_SUCCESS if successfully woken up.
2399 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2400 *
2401 * @param pGVM The global (ring-0) VM structure.
2402 * @param pVM The cross context VM structure.
2403 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2404 * @thread Any but EMT(idCpu).
2405 */
2406GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2407{
2408 return GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
2409}
2410
2411
2412/**
2413 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2414 * parameter and no used locking.
2415 *
2416 * @returns VBox status code.
2417 * @retval VINF_SUCCESS if successfully woken up.
2418 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2419 *
2420 * @param pVM The cross context VM structure.
2421 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2422 * @thread Any but EMT(idCpu).
2423 * @deprecated Don't use in new code if possible! Use the GVM variant.
2424 */
2425GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PVM pVM, VMCPUID idCpu)
2426{
2427 GVMM_CHECK_SMAP_SETUP();
2428 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2429 PGVM pGVM;
2430 PGVMM pGVMM;
2431 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/);
2432 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2433 if (RT_SUCCESS(rc))
2434 rc = GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, false /*fTakeUsedLock*/);
2435 return rc;
2436}
2437
2438
2439/**
2440 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2441 * the Virtual CPU if it's still busy executing guest code.
2442 *
2443 * @returns VBox status code.
2444 * @retval VINF_SUCCESS if poked successfully.
2445 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2446 *
2447 * @param pGVM The global (ring-0) VM structure.
2448 * @param pVCpu The cross context virtual CPU structure.
2449 */
2450DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPU pVCpu)
2451{
2452 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2453
2454 RTCPUID idHostCpu = pVCpu->idHostCpu;
2455 if ( idHostCpu == NIL_RTCPUID
2456 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2457 {
2458 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2459 return VINF_GVM_NOT_BUSY_IN_GC;
2460 }
2461
2462 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2463 RTMpPokeCpu(idHostCpu);
2464 return VINF_SUCCESS;
2465}
2466
2467
2468/**
2469 * Pokes an EMT if it's still busy running guest code.
2470 *
2471 * @returns VBox status code.
2472 * @retval VINF_SUCCESS if poked successfully.
2473 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2474 *
2475 * @param pGVM The global (ring-0) VM structure.
2476 * @param pVM The cross context VM structure.
2477 * @param idCpu The ID of the virtual CPU to poke.
2478 * @param fTakeUsedLock Take the used lock or not
2479 */
2480GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
2481{
2482 /*
2483 * Validate input and take the UsedLock.
2484 */
2485 PGVMM pGVMM;
2486 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
2487 if (RT_SUCCESS(rc))
2488 {
2489 if (idCpu < pGVM->cCpus)
2490 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2491 else
2492 rc = VERR_INVALID_CPU_ID;
2493
2494 if (fTakeUsedLock)
2495 {
2496 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2497 AssertRC(rc2);
2498 }
2499 }
2500
2501 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2502 return rc;
2503}
2504
2505
2506/**
2507 * Pokes an EMT if it's still busy running guest code.
2508 *
2509 * @returns VBox status code.
2510 * @retval VINF_SUCCESS if poked successfully.
2511 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2512 *
2513 * @param pGVM The global (ring-0) VM structure.
2514 * @param pVM The cross context VM structure.
2515 * @param idCpu The ID of the virtual CPU to poke.
2516 */
2517GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2518{
2519 return GVMMR0SchedPokeEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
2520}
2521
2522
2523/**
2524 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2525 * used locking.
2526 *
2527 * @returns VBox status code.
2528 * @retval VINF_SUCCESS if poked successfully.
2529 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2530 *
2531 * @param pVM The cross context VM structure.
2532 * @param idCpu The ID of the virtual CPU to poke.
2533 *
2534 * @deprecated Don't use in new code if possible! Use the GVM variant.
2535 */
2536GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PVM pVM, VMCPUID idCpu)
2537{
2538 PGVM pGVM;
2539 PGVMM pGVMM;
2540 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/);
2541 if (RT_SUCCESS(rc))
2542 {
2543 if (idCpu < pGVM->cCpus)
2544 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2545 else
2546 rc = VERR_INVALID_CPU_ID;
2547 }
2548 return rc;
2549}
2550
2551
2552/**
2553 * Wakes up a set of halted EMT threads so they can service pending request.
2554 *
2555 * @returns VBox status code, no informational stuff.
2556 *
2557 * @param pGVM The global (ring-0) VM structure.
2558 * @param pVM The cross context VM structure.
2559 * @param pSleepSet The set of sleepers to wake up.
2560 * @param pPokeSet The set of CPUs to poke.
2561 */
2562GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PVM pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2563{
2564 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2565 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2566 GVMM_CHECK_SMAP_SETUP();
2567 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2568 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2569
2570 /*
2571 * Validate input and take the UsedLock.
2572 */
2573 PGVMM pGVMM;
2574 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /* fTakeUsedLock */);
2575 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2576 if (RT_SUCCESS(rc))
2577 {
2578 rc = VINF_SUCCESS;
2579 VMCPUID idCpu = pGVM->cCpus;
2580 while (idCpu-- > 0)
2581 {
2582 /* Don't try poke or wake up ourselves. */
2583 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2584 continue;
2585
2586 /* just ignore errors for now. */
2587 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2588 {
2589 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2590 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2591 }
2592 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2593 {
2594 gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2595 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2596 }
2597 }
2598
2599 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2600 AssertRC(rc2);
2601 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2602 }
2603
2604 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2605 return rc;
2606}
2607
2608
2609/**
2610 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2611 *
2612 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2613 * @param pGVM The global (ring-0) VM structure.
2614 * @param pVM The cross context VM structure.
2615 * @param pReq Pointer to the request packet.
2616 */
2617GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PVM pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2618{
2619 /*
2620 * Validate input and pass it on.
2621 */
2622 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2623 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2624
2625 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, pVM, &pReq->SleepSet, &pReq->PokeSet);
2626}
2627
2628
2629
2630/**
2631 * Poll the schedule to see if someone else should get a chance to run.
2632 *
2633 * This is a bit hackish and will not work too well if the machine is
2634 * under heavy load from non-VM processes.
2635 *
2636 * @returns VINF_SUCCESS if not yielded.
2637 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2638 * @param pGVM The global (ring-0) VM structure.
2639 * @param pVM The cross context VM structure.
2640 * @param idCpu The Virtual CPU ID of the calling EMT.
2641 * @param fYield Whether to yield or not.
2642 * This is for when we're spinning in the halt loop.
2643 * @thread EMT(idCpu).
2644 */
2645GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fYield)
2646{
2647 /*
2648 * Validate input.
2649 */
2650 PGVMM pGVMM;
2651 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2652 if (RT_SUCCESS(rc))
2653 {
2654 /*
2655 * We currently only implement helping doing wakeups (fYield = false), so don't
2656 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2657 */
2658 if (!fYield && pGVMM->fDoEarlyWakeUps)
2659 {
2660 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2661 pGVM->gvmm.s.StatsSched.cPollCalls++;
2662
2663 Assert(ASMGetFlags() & X86_EFL_IF);
2664 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2665
2666 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2667
2668 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2669 }
2670 /*
2671 * Not quite sure what we could do here...
2672 */
2673 else if (fYield)
2674 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2675 else
2676 rc = VINF_SUCCESS;
2677 }
2678
2679 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2680 return rc;
2681}
2682
2683
2684#ifdef GVMM_SCHED_WITH_PPT
2685/**
2686 * Timer callback for the periodic preemption timer.
2687 *
2688 * @param pTimer The timer handle.
2689 * @param pvUser Pointer to the per cpu structure.
2690 * @param iTick The current tick.
2691 */
2692static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2693{
2694 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2695 NOREF(pTimer); NOREF(iTick);
2696
2697 /*
2698 * Termination check
2699 */
2700 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2701 return;
2702
2703 /*
2704 * Do the house keeping.
2705 */
2706 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2707
2708 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2709 {
2710 /*
2711 * Historicize the max frequency.
2712 */
2713 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2714 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2715 pCpu->Ppt.iTickHistorization = 0;
2716 pCpu->Ppt.uDesiredHz = 0;
2717
2718 /*
2719 * Check if the current timer frequency.
2720 */
2721 uint32_t uHistMaxHz = 0;
2722 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2723 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2724 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2725 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2726 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2727 else if (uHistMaxHz)
2728 {
2729 /*
2730 * Reprogram it.
2731 */
2732 pCpu->Ppt.cChanges++;
2733 pCpu->Ppt.iTickHistorization = 0;
2734 pCpu->Ppt.uTimerHz = uHistMaxHz;
2735 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2736 pCpu->Ppt.cNsInterval = cNsInterval;
2737 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2738 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2739 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2740 / cNsInterval;
2741 else
2742 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2743 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2744
2745 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2746 RTTimerChangeInterval(pTimer, cNsInterval);
2747 }
2748 else
2749 {
2750 /*
2751 * Stop it.
2752 */
2753 pCpu->Ppt.fStarted = false;
2754 pCpu->Ppt.uTimerHz = 0;
2755 pCpu->Ppt.cNsInterval = 0;
2756 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2757
2758 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2759 RTTimerStop(pTimer);
2760 }
2761 }
2762 else
2763 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2764}
2765#endif /* GVMM_SCHED_WITH_PPT */
2766
2767
2768/**
2769 * Updates the periodic preemption timer for the calling CPU.
2770 *
2771 * The caller must have disabled preemption!
2772 * The caller must check that the host can do high resolution timers.
2773 *
2774 * @param pVM The cross context VM structure.
2775 * @param idHostCpu The current host CPU id.
2776 * @param uHz The desired frequency.
2777 */
2778GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz)
2779{
2780 NOREF(pVM);
2781#ifdef GVMM_SCHED_WITH_PPT
2782 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2783 Assert(RTTimerCanDoHighResolution());
2784
2785 /*
2786 * Resolve the per CPU data.
2787 */
2788 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2789 PGVMM pGVMM = g_pGVMM;
2790 if ( !VALID_PTR(pGVMM)
2791 || pGVMM->u32Magic != GVMM_MAGIC)
2792 return;
2793 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2794 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2795 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2796 && pCpu->idCpu == idHostCpu,
2797 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2798
2799 /*
2800 * Check whether we need to do anything about the timer.
2801 * We have to be a little bit careful since we might be race the timer
2802 * callback here.
2803 */
2804 if (uHz > 16384)
2805 uHz = 16384; /** @todo add a query method for this! */
2806 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2807 && uHz >= pCpu->Ppt.uMinHz
2808 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2809 {
2810 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2811
2812 pCpu->Ppt.uDesiredHz = uHz;
2813 uint32_t cNsInterval = 0;
2814 if (!pCpu->Ppt.fStarted)
2815 {
2816 pCpu->Ppt.cStarts++;
2817 pCpu->Ppt.fStarted = true;
2818 pCpu->Ppt.fStarting = true;
2819 pCpu->Ppt.iTickHistorization = 0;
2820 pCpu->Ppt.uTimerHz = uHz;
2821 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2822 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2823 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2824 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2825 / cNsInterval;
2826 else
2827 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2828 }
2829
2830 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2831
2832 if (cNsInterval)
2833 {
2834 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2835 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2836 AssertRC(rc);
2837
2838 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2839 if (RT_FAILURE(rc))
2840 pCpu->Ppt.fStarted = false;
2841 pCpu->Ppt.fStarting = false;
2842 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2843 }
2844 }
2845#else /* !GVMM_SCHED_WITH_PPT */
2846 NOREF(idHostCpu); NOREF(uHz);
2847#endif /* !GVMM_SCHED_WITH_PPT */
2848}
2849
2850
2851/**
2852 * Retrieves the GVMM statistics visible to the caller.
2853 *
2854 * @returns VBox status code.
2855 *
2856 * @param pStats Where to put the statistics.
2857 * @param pSession The current session.
2858 * @param pGVM The GVM to obtain statistics for. Optional.
2859 * @param pVM The VM structure corresponding to @a pGVM.
2860 */
2861GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
2862{
2863 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
2864
2865 /*
2866 * Validate input.
2867 */
2868 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2869 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2870 pStats->cVMs = 0; /* (crash before taking the sem...) */
2871
2872 /*
2873 * Take the lock and get the VM statistics.
2874 */
2875 PGVMM pGVMM;
2876 if (pGVM)
2877 {
2878 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/);
2879 if (RT_FAILURE(rc))
2880 return rc;
2881 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2882 }
2883 else
2884 {
2885 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2886 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2887
2888 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2889 AssertRCReturn(rc, rc);
2890 }
2891
2892 /*
2893 * Enumerate the VMs and add the ones visible to the statistics.
2894 */
2895 pStats->cVMs = 0;
2896 pStats->cEMTs = 0;
2897 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2898
2899 for (unsigned i = pGVMM->iUsedHead;
2900 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2901 i = pGVMM->aHandles[i].iNext)
2902 {
2903 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
2904 void *pvObj = pGVMM->aHandles[i].pvObj;
2905 if ( VALID_PTR(pvObj)
2906 && VALID_PTR(pOtherGVM)
2907 && pOtherGVM->u32Magic == GVM_MAGIC
2908 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2909 {
2910 pStats->cVMs++;
2911 pStats->cEMTs += pOtherGVM->cCpus;
2912
2913 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
2914 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
2915 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
2916 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2917 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
2918
2919 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
2920 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2921 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2922
2923 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
2924 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
2925
2926 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
2927 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
2928 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
2929 }
2930 }
2931
2932 /*
2933 * Copy out the per host CPU statistics.
2934 */
2935 uint32_t iDstCpu = 0;
2936 uint32_t cSrcCpus = pGVMM->cHostCpus;
2937 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2938 {
2939 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2940 {
2941 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2942 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2943#ifdef GVMM_SCHED_WITH_PPT
2944 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2945 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2946 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2947 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2948#else
2949 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2950 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2951 pStats->aHostCpus[iDstCpu].cChanges = 0;
2952 pStats->aHostCpus[iDstCpu].cStarts = 0;
2953#endif
2954 iDstCpu++;
2955 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
2956 break;
2957 }
2958 }
2959 pStats->cHostCpus = iDstCpu;
2960
2961 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2962
2963 return VINF_SUCCESS;
2964}
2965
2966
2967/**
2968 * VMMR0 request wrapper for GVMMR0QueryStatistics.
2969 *
2970 * @returns see GVMMR0QueryStatistics.
2971 * @param pGVM The global (ring-0) VM structure. Optional.
2972 * @param pVM The cross context VM structure. Optional.
2973 * @param pReq Pointer to the request packet.
2974 * @param pSession The current session.
2975 */
2976GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PVM pVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
2977{
2978 /*
2979 * Validate input and pass it on.
2980 */
2981 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2982 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2983 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
2984
2985 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM, pVM);
2986}
2987
2988
2989/**
2990 * Resets the specified GVMM statistics.
2991 *
2992 * @returns VBox status code.
2993 *
2994 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
2995 * @param pSession The current session.
2996 * @param pGVM The GVM to reset statistics for. Optional.
2997 * @param pVM The VM structure corresponding to @a pGVM.
2998 */
2999GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
3000{
3001 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
3002
3003 /*
3004 * Validate input.
3005 */
3006 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3007 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3008
3009 /*
3010 * Take the lock and get the VM statistics.
3011 */
3012 PGVMM pGVMM;
3013 if (pGVM)
3014 {
3015 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/);
3016 if (RT_FAILURE(rc))
3017 return rc;
3018# define MAYBE_RESET_FIELD(field) \
3019 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3020 MAYBE_RESET_FIELD(cHaltCalls);
3021 MAYBE_RESET_FIELD(cHaltBlocking);
3022 MAYBE_RESET_FIELD(cHaltTimeouts);
3023 MAYBE_RESET_FIELD(cHaltNotBlocking);
3024 MAYBE_RESET_FIELD(cHaltWakeUps);
3025 MAYBE_RESET_FIELD(cWakeUpCalls);
3026 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3027 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3028 MAYBE_RESET_FIELD(cPokeCalls);
3029 MAYBE_RESET_FIELD(cPokeNotBusy);
3030 MAYBE_RESET_FIELD(cPollCalls);
3031 MAYBE_RESET_FIELD(cPollHalts);
3032 MAYBE_RESET_FIELD(cPollWakeUps);
3033# undef MAYBE_RESET_FIELD
3034 }
3035 else
3036 {
3037 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3038
3039 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3040 AssertRCReturn(rc, rc);
3041 }
3042
3043 /*
3044 * Enumerate the VMs and add the ones visible to the statistics.
3045 */
3046 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
3047 {
3048 for (unsigned i = pGVMM->iUsedHead;
3049 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3050 i = pGVMM->aHandles[i].iNext)
3051 {
3052 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3053 void *pvObj = pGVMM->aHandles[i].pvObj;
3054 if ( VALID_PTR(pvObj)
3055 && VALID_PTR(pOtherGVM)
3056 && pOtherGVM->u32Magic == GVM_MAGIC
3057 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3058 {
3059# define MAYBE_RESET_FIELD(field) \
3060 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3061 MAYBE_RESET_FIELD(cHaltCalls);
3062 MAYBE_RESET_FIELD(cHaltBlocking);
3063 MAYBE_RESET_FIELD(cHaltTimeouts);
3064 MAYBE_RESET_FIELD(cHaltNotBlocking);
3065 MAYBE_RESET_FIELD(cHaltWakeUps);
3066 MAYBE_RESET_FIELD(cWakeUpCalls);
3067 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3068 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3069 MAYBE_RESET_FIELD(cPokeCalls);
3070 MAYBE_RESET_FIELD(cPokeNotBusy);
3071 MAYBE_RESET_FIELD(cPollCalls);
3072 MAYBE_RESET_FIELD(cPollHalts);
3073 MAYBE_RESET_FIELD(cPollWakeUps);
3074# undef MAYBE_RESET_FIELD
3075 }
3076 }
3077 }
3078
3079 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3080
3081 return VINF_SUCCESS;
3082}
3083
3084
3085/**
3086 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3087 *
3088 * @returns see GVMMR0ResetStatistics.
3089 * @param pGVM The global (ring-0) VM structure. Optional.
3090 * @param pVM The cross context VM structure. Optional.
3091 * @param pReq Pointer to the request packet.
3092 * @param pSession The current session.
3093 */
3094GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PVM pVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3095{
3096 /*
3097 * Validate input and pass it on.
3098 */
3099 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3100 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3101 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3102
3103 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM, pVM);
3104}
3105
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette