VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 91684

Last change on this file since 91684 was 91287, checked in by vboxsync, 3 years ago

VMM/CPUM,++: Moved the nested SVM VMCB allocation into CPUMCTX. bugref:10093

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 108.4 KB
Line 
1/* $Id: GVMMR0.cpp 91287 2021-09-16 21:30:45Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*********************************************************************************************************************************
50* Header Files *
51*********************************************************************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/dbgf.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/pdm.h>
59#include <VBox/vmm/pgm.h>
60#include <VBox/vmm/vmm.h>
61#ifdef VBOX_WITH_NEM_R0
62# include <VBox/vmm/nem.h>
63#endif
64#include <VBox/vmm/vmcpuset.h>
65#include <VBox/vmm/vmcc.h>
66#include <VBox/param.h>
67#include <VBox/err.h>
68
69#include <iprt/asm.h>
70#include <iprt/asm-amd64-x86.h>
71#include <iprt/critsect.h>
72#include <iprt/mem.h>
73#include <iprt/semaphore.h>
74#include <iprt/time.h>
75#include <VBox/log.h>
76#include <iprt/thread.h>
77#include <iprt/process.h>
78#include <iprt/param.h>
79#include <iprt/string.h>
80#include <iprt/assert.h>
81#include <iprt/mem.h>
82#include <iprt/memobj.h>
83#include <iprt/mp.h>
84#include <iprt/cpuset.h>
85#include <iprt/spinlock.h>
86#include <iprt/timer.h>
87
88#include "dtrace/VBoxVMM.h"
89
90
91/*********************************************************************************************************************************
92* Defined Constants And Macros *
93*********************************************************************************************************************************/
94#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING)
95/** Define this to enable the periodic preemption timer. */
96# define GVMM_SCHED_WITH_PPT
97#endif
98
99
100/** Special value that GVMMR0DeregisterVCpu sets. */
101#define GVMM_RTNATIVETHREAD_DESTROYED (~(RTNATIVETHREAD)1)
102AssertCompile(GVMM_RTNATIVETHREAD_DESTROYED != NIL_RTNATIVETHREAD);
103
104
105/*********************************************************************************************************************************
106* Structures and Typedefs *
107*********************************************************************************************************************************/
108
109/**
110 * Global VM handle.
111 */
112typedef struct GVMHANDLE
113{
114 /** The index of the next handle in the list (free or used). (0 is nil.) */
115 uint16_t volatile iNext;
116 /** Our own index / handle value. */
117 uint16_t iSelf;
118 /** The process ID of the handle owner.
119 * This is used for access checks. */
120 RTPROCESS ProcId;
121 /** The pointer to the ring-0 only (aka global) VM structure. */
122 PGVM pGVM;
123 /** The virtual machine object. */
124 void *pvObj;
125 /** The session this VM is associated with. */
126 PSUPDRVSESSION pSession;
127 /** The ring-0 handle of the EMT0 thread.
128 * This is used for ownership checks as well as looking up a VM handle by thread
129 * at times like assertions. */
130 RTNATIVETHREAD hEMT0;
131} GVMHANDLE;
132/** Pointer to a global VM handle. */
133typedef GVMHANDLE *PGVMHANDLE;
134
135/** Number of GVM handles (including the NIL handle). */
136#if HC_ARCH_BITS == 64
137# define GVMM_MAX_HANDLES 8192
138#else
139# define GVMM_MAX_HANDLES 128
140#endif
141
142/**
143 * Per host CPU GVMM data.
144 */
145typedef struct GVMMHOSTCPU
146{
147 /** Magic number (GVMMHOSTCPU_MAGIC). */
148 uint32_t volatile u32Magic;
149 /** The CPU ID. */
150 RTCPUID idCpu;
151 /** The CPU set index. */
152 uint32_t idxCpuSet;
153
154#ifdef GVMM_SCHED_WITH_PPT
155 /** Periodic preemption timer data. */
156 struct
157 {
158 /** The handle to the periodic preemption timer. */
159 PRTTIMER pTimer;
160 /** Spinlock protecting the data below. */
161 RTSPINLOCK hSpinlock;
162 /** The smalles Hz that we need to care about. (static) */
163 uint32_t uMinHz;
164 /** The number of ticks between each historization. */
165 uint32_t cTicksHistoriziationInterval;
166 /** The current historization tick (counting up to
167 * cTicksHistoriziationInterval and then resetting). */
168 uint32_t iTickHistorization;
169 /** The current timer interval. This is set to 0 when inactive. */
170 uint32_t cNsInterval;
171 /** The current timer frequency. This is set to 0 when inactive. */
172 uint32_t uTimerHz;
173 /** The current max frequency reported by the EMTs.
174 * This gets historicize and reset by the timer callback. This is
175 * read without holding the spinlock, so needs atomic updating. */
176 uint32_t volatile uDesiredHz;
177 /** Whether the timer was started or not. */
178 bool volatile fStarted;
179 /** Set if we're starting timer. */
180 bool volatile fStarting;
181 /** The index of the next history entry (mod it). */
182 uint32_t iHzHistory;
183 /** Historicized uDesiredHz values. The array wraps around, new entries
184 * are added at iHzHistory. This is updated approximately every
185 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
186 uint32_t aHzHistory[8];
187 /** Statistics counter for recording the number of interval changes. */
188 uint32_t cChanges;
189 /** Statistics counter for recording the number of timer starts. */
190 uint32_t cStarts;
191 } Ppt;
192#endif /* GVMM_SCHED_WITH_PPT */
193
194} GVMMHOSTCPU;
195/** Pointer to the per host CPU GVMM data. */
196typedef GVMMHOSTCPU *PGVMMHOSTCPU;
197/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
198#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
199/** The interval on history entry should cover (approximately) give in
200 * nanoseconds. */
201#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
202
203
204/**
205 * The GVMM instance data.
206 */
207typedef struct GVMM
208{
209 /** Eyecatcher / magic. */
210 uint32_t u32Magic;
211 /** The index of the head of the free handle chain. (0 is nil.) */
212 uint16_t volatile iFreeHead;
213 /** The index of the head of the active handle chain. (0 is nil.) */
214 uint16_t volatile iUsedHead;
215 /** The number of VMs. */
216 uint16_t volatile cVMs;
217 /** Alignment padding. */
218 uint16_t u16Reserved;
219 /** The number of EMTs. */
220 uint32_t volatile cEMTs;
221 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
222 uint32_t volatile cHaltedEMTs;
223 /** Mini lock for restricting early wake-ups to one thread. */
224 bool volatile fDoingEarlyWakeUps;
225 bool afPadding[3]; /**< explicit alignment padding. */
226 /** When the next halted or sleeping EMT will wake up.
227 * This is set to 0 when it needs recalculating and to UINT64_MAX when
228 * there are no halted or sleeping EMTs in the GVMM. */
229 uint64_t uNsNextEmtWakeup;
230 /** The lock used to serialize VM creation, destruction and associated events that
231 * isn't performance critical. Owners may acquire the list lock. */
232 RTCRITSECT CreateDestroyLock;
233 /** The lock used to serialize used list updates and accesses.
234 * This indirectly includes scheduling since the scheduler will have to walk the
235 * used list to examin running VMs. Owners may not acquire any other locks. */
236 RTCRITSECTRW UsedLock;
237 /** The handle array.
238 * The size of this array defines the maximum number of currently running VMs.
239 * The first entry is unused as it represents the NIL handle. */
240 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
241
242 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
243 * The number of EMTs that means we no longer consider ourselves alone on a
244 * CPU/Core.
245 */
246 uint32_t cEMTsMeansCompany;
247 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
248 * The minimum sleep time for when we're alone, in nano seconds.
249 */
250 uint32_t nsMinSleepAlone;
251 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
252 * The minimum sleep time for when we've got company, in nano seconds.
253 */
254 uint32_t nsMinSleepCompany;
255 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
256 * The limit for the first round of early wake-ups, given in nano seconds.
257 */
258 uint32_t nsEarlyWakeUp1;
259 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
260 * The limit for the second round of early wake-ups, given in nano seconds.
261 */
262 uint32_t nsEarlyWakeUp2;
263
264 /** Set if we're doing early wake-ups.
265 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
266 bool volatile fDoEarlyWakeUps;
267
268 /** The number of entries in the host CPU array (aHostCpus). */
269 uint32_t cHostCpus;
270 /** Per host CPU data (variable length). */
271 GVMMHOSTCPU aHostCpus[1];
272} GVMM;
273AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
274AssertCompileMemberAlignment(GVMM, UsedLock, 8);
275AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
276/** Pointer to the GVMM instance data. */
277typedef GVMM *PGVMM;
278
279/** The GVMM::u32Magic value (Charlie Haden). */
280#define GVMM_MAGIC UINT32_C(0x19370806)
281
282
283
284/*********************************************************************************************************************************
285* Global Variables *
286*********************************************************************************************************************************/
287/** Pointer to the GVMM instance data.
288 * (Just my general dislike for global variables.) */
289static PGVMM g_pGVMM = NULL;
290
291/** Macro for obtaining and validating the g_pGVMM pointer.
292 * On failure it will return from the invoking function with the specified return value.
293 *
294 * @param pGVMM The name of the pGVMM variable.
295 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
296 * status codes.
297 */
298#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
299 do { \
300 (pGVMM) = g_pGVMM;\
301 AssertPtrReturn((pGVMM), (rc)); \
302 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
303 } while (0)
304
305/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
306 * On failure it will return from the invoking function.
307 *
308 * @param pGVMM The name of the pGVMM variable.
309 */
310#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
311 do { \
312 (pGVMM) = g_pGVMM;\
313 AssertPtrReturnVoid((pGVMM)); \
314 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
315 } while (0)
316
317
318/*********************************************************************************************************************************
319* Internal Functions *
320*********************************************************************************************************************************/
321static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession);
322static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
323static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
324static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM);
325
326#ifdef GVMM_SCHED_WITH_PPT
327static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
328#endif
329
330
331/**
332 * Initializes the GVMM.
333 *
334 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
335 *
336 * @returns VBox status code.
337 */
338GVMMR0DECL(int) GVMMR0Init(void)
339{
340 LogFlow(("GVMMR0Init:\n"));
341
342 /*
343 * Allocate and initialize the instance data.
344 */
345 uint32_t cHostCpus = RTMpGetArraySize();
346 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
347
348 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
349 if (!pGVMM)
350 return VERR_NO_MEMORY;
351 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
352 "GVMM-CreateDestroyLock");
353 if (RT_SUCCESS(rc))
354 {
355 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
356 if (RT_SUCCESS(rc))
357 {
358 pGVMM->u32Magic = GVMM_MAGIC;
359 pGVMM->iUsedHead = 0;
360 pGVMM->iFreeHead = 1;
361
362 /* the nil handle */
363 pGVMM->aHandles[0].iSelf = 0;
364 pGVMM->aHandles[0].iNext = 0;
365
366 /* the tail */
367 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
368 pGVMM->aHandles[i].iSelf = i;
369 pGVMM->aHandles[i].iNext = 0; /* nil */
370
371 /* the rest */
372 while (i-- > 1)
373 {
374 pGVMM->aHandles[i].iSelf = i;
375 pGVMM->aHandles[i].iNext = i + 1;
376 }
377
378 /* The default configuration values. */
379 uint32_t cNsResolution = RTSemEventMultiGetResolution();
380 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
381 if (cNsResolution >= 5*RT_NS_100US)
382 {
383 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
384 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
385 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
386 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
387 }
388 else if (cNsResolution > RT_NS_100US)
389 {
390 pGVMM->nsMinSleepAlone = cNsResolution / 2;
391 pGVMM->nsMinSleepCompany = cNsResolution / 4;
392 pGVMM->nsEarlyWakeUp1 = 0;
393 pGVMM->nsEarlyWakeUp2 = 0;
394 }
395 else
396 {
397 pGVMM->nsMinSleepAlone = 2000;
398 pGVMM->nsMinSleepCompany = 2000;
399 pGVMM->nsEarlyWakeUp1 = 0;
400 pGVMM->nsEarlyWakeUp2 = 0;
401 }
402 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
403
404 /* The host CPU data. */
405 pGVMM->cHostCpus = cHostCpus;
406 uint32_t iCpu = cHostCpus;
407 RTCPUSET PossibleSet;
408 RTMpGetSet(&PossibleSet);
409 while (iCpu-- > 0)
410 {
411 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
412#ifdef GVMM_SCHED_WITH_PPT
413 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
414 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
415 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
416 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
417 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
418 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
419 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
420 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
421 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
422 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
423 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
424 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
425#endif
426
427 if (RTCpuSetIsMember(&PossibleSet, iCpu))
428 {
429 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
430 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
431
432#ifdef GVMM_SCHED_WITH_PPT
433 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
434 50*1000*1000 /* whatever */,
435 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
436 gvmmR0SchedPeriodicPreemptionTimerCallback,
437 &pGVMM->aHostCpus[iCpu]);
438 if (RT_SUCCESS(rc))
439 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
440 if (RT_FAILURE(rc))
441 {
442 while (iCpu < cHostCpus)
443 {
444 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
445 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
446 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
447 iCpu++;
448 }
449 break;
450 }
451#endif
452 }
453 else
454 {
455 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
456 pGVMM->aHostCpus[iCpu].u32Magic = 0;
457 }
458 }
459 if (RT_SUCCESS(rc))
460 {
461 g_pGVMM = pGVMM;
462 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
463 return VINF_SUCCESS;
464 }
465
466 /* bail out. */
467 RTCritSectRwDelete(&pGVMM->UsedLock);
468 }
469 RTCritSectDelete(&pGVMM->CreateDestroyLock);
470 }
471
472 RTMemFree(pGVMM);
473 return rc;
474}
475
476
477/**
478 * Terminates the GVM.
479 *
480 * This is called while owning the loader semaphore (see supdrvLdrFree()).
481 * And unless something is wrong, there should be absolutely no VMs
482 * registered at this point.
483 */
484GVMMR0DECL(void) GVMMR0Term(void)
485{
486 LogFlow(("GVMMR0Term:\n"));
487
488 PGVMM pGVMM = g_pGVMM;
489 g_pGVMM = NULL;
490 if (RT_UNLIKELY(!RT_VALID_PTR(pGVMM)))
491 {
492 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
493 return;
494 }
495
496 /*
497 * First of all, stop all active timers.
498 */
499 uint32_t cActiveTimers = 0;
500 uint32_t iCpu = pGVMM->cHostCpus;
501 while (iCpu-- > 0)
502 {
503 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
504#ifdef GVMM_SCHED_WITH_PPT
505 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
506 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
507 cActiveTimers++;
508#endif
509 }
510 if (cActiveTimers)
511 RTThreadSleep(1); /* fudge */
512
513 /*
514 * Invalidate the and free resources.
515 */
516 pGVMM->u32Magic = ~GVMM_MAGIC;
517 RTCritSectRwDelete(&pGVMM->UsedLock);
518 RTCritSectDelete(&pGVMM->CreateDestroyLock);
519
520 pGVMM->iFreeHead = 0;
521 if (pGVMM->iUsedHead)
522 {
523 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
524 pGVMM->iUsedHead = 0;
525 }
526
527#ifdef GVMM_SCHED_WITH_PPT
528 iCpu = pGVMM->cHostCpus;
529 while (iCpu-- > 0)
530 {
531 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
532 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
533 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
534 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
535 }
536#endif
537
538 RTMemFree(pGVMM);
539}
540
541
542/**
543 * A quick hack for setting global config values.
544 *
545 * @returns VBox status code.
546 *
547 * @param pSession The session handle. Used for authentication.
548 * @param pszName The variable name.
549 * @param u64Value The new value.
550 */
551GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
552{
553 /*
554 * Validate input.
555 */
556 PGVMM pGVMM;
557 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
558 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
559 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
560
561 /*
562 * String switch time!
563 */
564 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
565 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
566 int rc = VINF_SUCCESS;
567 pszName += sizeof("/GVMM/") - 1;
568 if (!strcmp(pszName, "cEMTsMeansCompany"))
569 {
570 if (u64Value <= UINT32_MAX)
571 pGVMM->cEMTsMeansCompany = u64Value;
572 else
573 rc = VERR_OUT_OF_RANGE;
574 }
575 else if (!strcmp(pszName, "MinSleepAlone"))
576 {
577 if (u64Value <= RT_NS_100MS)
578 pGVMM->nsMinSleepAlone = u64Value;
579 else
580 rc = VERR_OUT_OF_RANGE;
581 }
582 else if (!strcmp(pszName, "MinSleepCompany"))
583 {
584 if (u64Value <= RT_NS_100MS)
585 pGVMM->nsMinSleepCompany = u64Value;
586 else
587 rc = VERR_OUT_OF_RANGE;
588 }
589 else if (!strcmp(pszName, "EarlyWakeUp1"))
590 {
591 if (u64Value <= RT_NS_100MS)
592 {
593 pGVMM->nsEarlyWakeUp1 = u64Value;
594 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
595 }
596 else
597 rc = VERR_OUT_OF_RANGE;
598 }
599 else if (!strcmp(pszName, "EarlyWakeUp2"))
600 {
601 if (u64Value <= RT_NS_100MS)
602 {
603 pGVMM->nsEarlyWakeUp2 = u64Value;
604 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
605 }
606 else
607 rc = VERR_OUT_OF_RANGE;
608 }
609 else
610 rc = VERR_CFGM_VALUE_NOT_FOUND;
611 return rc;
612}
613
614
615/**
616 * A quick hack for getting global config values.
617 *
618 * @returns VBox status code.
619 *
620 * @param pSession The session handle. Used for authentication.
621 * @param pszName The variable name.
622 * @param pu64Value Where to return the value.
623 */
624GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
625{
626 /*
627 * Validate input.
628 */
629 PGVMM pGVMM;
630 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
631 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
632 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
633 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
634
635 /*
636 * String switch time!
637 */
638 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
639 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
640 int rc = VINF_SUCCESS;
641 pszName += sizeof("/GVMM/") - 1;
642 if (!strcmp(pszName, "cEMTsMeansCompany"))
643 *pu64Value = pGVMM->cEMTsMeansCompany;
644 else if (!strcmp(pszName, "MinSleepAlone"))
645 *pu64Value = pGVMM->nsMinSleepAlone;
646 else if (!strcmp(pszName, "MinSleepCompany"))
647 *pu64Value = pGVMM->nsMinSleepCompany;
648 else if (!strcmp(pszName, "EarlyWakeUp1"))
649 *pu64Value = pGVMM->nsEarlyWakeUp1;
650 else if (!strcmp(pszName, "EarlyWakeUp2"))
651 *pu64Value = pGVMM->nsEarlyWakeUp2;
652 else
653 rc = VERR_CFGM_VALUE_NOT_FOUND;
654 return rc;
655}
656
657
658/**
659 * Acquire the 'used' lock in shared mode.
660 *
661 * This prevents destruction of the VM while we're in ring-0.
662 *
663 * @returns IPRT status code, see RTSemFastMutexRequest.
664 * @param a_pGVMM The GVMM instance data.
665 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
666 */
667#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
668
669/**
670 * Release the 'used' lock in when owning it in shared mode.
671 *
672 * @returns IPRT status code, see RTSemFastMutexRequest.
673 * @param a_pGVMM The GVMM instance data.
674 * @sa GVMMR0_USED_SHARED_LOCK
675 */
676#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
677
678/**
679 * Acquire the 'used' lock in exclusive mode.
680 *
681 * Only use this function when making changes to the used list.
682 *
683 * @returns IPRT status code, see RTSemFastMutexRequest.
684 * @param a_pGVMM The GVMM instance data.
685 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
686 */
687#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
688
689/**
690 * Release the 'used' lock when owning it in exclusive mode.
691 *
692 * @returns IPRT status code, see RTSemFastMutexRelease.
693 * @param a_pGVMM The GVMM instance data.
694 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
695 */
696#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
697
698
699/**
700 * Try acquire the 'create & destroy' lock.
701 *
702 * @returns IPRT status code, see RTSemFastMutexRequest.
703 * @param pGVMM The GVMM instance data.
704 */
705DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
706{
707 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
708 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
709 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
710 return rc;
711}
712
713
714/**
715 * Release the 'create & destroy' lock.
716 *
717 * @returns IPRT status code, see RTSemFastMutexRequest.
718 * @param pGVMM The GVMM instance data.
719 */
720DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
721{
722 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
723 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
724 AssertRC(rc);
725 return rc;
726}
727
728
729/**
730 * Request wrapper for the GVMMR0CreateVM API.
731 *
732 * @returns VBox status code.
733 * @param pReq The request buffer.
734 * @param pSession The session handle. The VM will be associated with this.
735 */
736GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
737{
738 /*
739 * Validate the request.
740 */
741 if (!RT_VALID_PTR(pReq))
742 return VERR_INVALID_POINTER;
743 if (pReq->Hdr.cbReq != sizeof(*pReq))
744 return VERR_INVALID_PARAMETER;
745 if (pReq->pSession != pSession)
746 return VERR_INVALID_POINTER;
747
748 /*
749 * Execute it.
750 */
751 PGVM pGVM;
752 pReq->pVMR0 = NULL;
753 pReq->pVMR3 = NIL_RTR3PTR;
754 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pGVM);
755 if (RT_SUCCESS(rc))
756 {
757 pReq->pVMR0 = pGVM; /** @todo don't expose this to ring-3, use a unique random number instead. */
758 pReq->pVMR3 = pGVM->pVMR3;
759 }
760 return rc;
761}
762
763
764/**
765 * Allocates the VM structure and registers it with GVM.
766 *
767 * The caller will become the VM owner and there by the EMT.
768 *
769 * @returns VBox status code.
770 * @param pSession The support driver session.
771 * @param cCpus Number of virtual CPUs for the new VM.
772 * @param ppGVM Where to store the pointer to the VM structure.
773 *
774 * @thread EMT.
775 */
776GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PGVM *ppGVM)
777{
778 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
779 PGVMM pGVMM;
780 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
781
782 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
783 *ppGVM = NULL;
784
785 if ( cCpus == 0
786 || cCpus > VMM_MAX_CPU_COUNT)
787 return VERR_INVALID_PARAMETER;
788
789 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
790 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
791 RTPROCESS ProcId = RTProcSelf();
792 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
793
794 /*
795 * The whole allocation process is protected by the lock.
796 */
797 int rc = gvmmR0CreateDestroyLock(pGVMM);
798 AssertRCReturn(rc, rc);
799
800 /*
801 * Only one VM per session.
802 */
803 if (SUPR0GetSessionVM(pSession) != NULL)
804 {
805 gvmmR0CreateDestroyUnlock(pGVMM);
806 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
807 return VERR_ALREADY_EXISTS;
808 }
809
810 /*
811 * Allocate a handle first so we don't waste resources unnecessarily.
812 */
813 uint16_t iHandle = pGVMM->iFreeHead;
814 if (iHandle)
815 {
816 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
817
818 /* consistency checks, a bit paranoid as always. */
819 if ( !pHandle->pGVM
820 && !pHandle->pvObj
821 && pHandle->iSelf == iHandle)
822 {
823 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
824 if (pHandle->pvObj)
825 {
826 /*
827 * Move the handle from the free to used list and perform permission checks.
828 */
829 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
830 AssertRC(rc);
831
832 pGVMM->iFreeHead = pHandle->iNext;
833 pHandle->iNext = pGVMM->iUsedHead;
834 pGVMM->iUsedHead = iHandle;
835 pGVMM->cVMs++;
836
837 pHandle->pGVM = NULL;
838 pHandle->pSession = pSession;
839 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
840 pHandle->ProcId = NIL_RTPROCESS;
841
842 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
843
844 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
845 if (RT_SUCCESS(rc))
846 {
847 /*
848 * Allocate memory for the VM structure (combined VM + GVM).
849 */
850 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
851 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
852 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
853 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
854 if (RT_SUCCESS(rc))
855 {
856 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
857 AssertPtr(pGVM);
858
859 /*
860 * Initialise the structure.
861 */
862 RT_BZERO(pGVM, cPages << PAGE_SHIFT);
863 gvmmR0InitPerVMData(pGVM, iHandle, cCpus, pSession);
864 pGVM->gvmm.s.VMMemObj = hVMMemObj;
865 rc = GMMR0InitPerVMData(pGVM);
866 int rc2 = PGMR0InitPerVMData(pGVM);
867 int rc3 = VMMR0InitPerVMData(pGVM);
868 DBGFR0InitPerVMData(pGVM);
869 PDMR0InitPerVMData(pGVM);
870 IOMR0InitPerVMData(pGVM);
871 TMR0InitPerVMData(pGVM);
872 if (RT_SUCCESS(rc) && RT_SUCCESS(rc2) && RT_SUCCESS(rc3))
873 {
874 /*
875 * Allocate page array.
876 * This currently have to be made available to ring-3, but this is should change eventually.
877 */
878 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
879 if (RT_SUCCESS(rc))
880 {
881 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
882 for (uint32_t iPage = 0; iPage < cPages; iPage++)
883 {
884 paPages[iPage].uReserved = 0;
885 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
886 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
887 }
888
889 /*
890 * Map the page array, VM and VMCPU structures into ring-3.
891 */
892 AssertCompileSizeAlignment(VM, PAGE_SIZE);
893 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
894 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
895 0 /*offSub*/, sizeof(VM));
896 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
897 {
898 AssertCompileSizeAlignment(VMCPU, PAGE_SIZE);
899 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
900 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
901 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
902 }
903 if (RT_SUCCESS(rc))
904 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
905 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
906 NIL_RTR0PROCESS);
907 if (RT_SUCCESS(rc))
908 {
909 /*
910 * Initialize all the VM pointers.
911 */
912 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
913 AssertMsg(RTR0MemUserIsValidAddr(pVMR3) && pVMR3 != NIL_RTR3PTR, ("%p\n", pVMR3));
914
915 for (VMCPUID i = 0; i < cCpus; i++)
916 {
917 pGVM->aCpus[i].pVMR0 = pGVM;
918 pGVM->aCpus[i].pVMR3 = pVMR3;
919 pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
920 pGVM->aCpus[i].pVCpuR3 = pGVM->apCpusR3[i];
921 pGVM->apCpusR0[i] = &pGVM->aCpus[i];
922 AssertMsg(RTR0MemUserIsValidAddr(pGVM->apCpusR3[i]) && pGVM->apCpusR3[i] != NIL_RTR3PTR,
923 ("apCpusR3[%u]=%p\n", i, pGVM->apCpusR3[i]));
924 }
925
926 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
927 AssertMsg(RTR0MemUserIsValidAddr(pGVM->paVMPagesR3) && pGVM->paVMPagesR3 != NIL_RTR3PTR,
928 ("%p\n", pGVM->paVMPagesR3));
929
930 /*
931 * Complete the handle - take the UsedLock sem just to be careful.
932 */
933 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
934 AssertRC(rc);
935
936 pHandle->pGVM = pGVM;
937 pHandle->hEMT0 = hEMT0;
938 pHandle->ProcId = ProcId;
939 pGVM->pVMR3 = pVMR3;
940 pGVM->pVMR3Unsafe = pVMR3;
941 pGVM->aCpus[0].hEMT = hEMT0;
942 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
943 pGVM->aCpus[0].cEmtHashCollisions = 0;
944 uint32_t const idxHash = GVMM_EMT_HASH_1(hEMT0);
945 pGVM->aCpus[0].gvmm.s.idxEmtHash = (uint16_t)idxHash;
946 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hEMT0;
947 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = 0;
948 pGVMM->cEMTs += cCpus;
949
950 /* Associate it with the session and create the context hook for EMT0. */
951 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
952 if (RT_SUCCESS(rc))
953 {
954 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
955 if (RT_SUCCESS(rc))
956 {
957 /*
958 * Done!
959 */
960 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
961
962 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
963 gvmmR0CreateDestroyUnlock(pGVMM);
964
965 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
966
967 *ppGVM = pGVM;
968 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
969 return VINF_SUCCESS;
970 }
971
972 SUPR0SetSessionVM(pSession, NULL, NULL);
973 }
974 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
975 }
976
977 /* Cleanup mappings. */
978 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
979 {
980 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
981 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
982 }
983 for (VMCPUID i = 0; i < cCpus; i++)
984 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
985 {
986 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
987 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
988 }
989 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
990 {
991 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
992 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
993 }
994 }
995 }
996 else
997 {
998 if (RT_SUCCESS_NP(rc))
999 rc = rc2;
1000 if (RT_SUCCESS_NP(rc))
1001 rc = rc3;
1002 }
1003 }
1004 }
1005 /* else: The user wasn't permitted to create this VM. */
1006
1007 /*
1008 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1009 * object reference here. A little extra mess because of non-recursive lock.
1010 */
1011 void *pvObj = pHandle->pvObj;
1012 pHandle->pvObj = NULL;
1013 gvmmR0CreateDestroyUnlock(pGVMM);
1014
1015 SUPR0ObjRelease(pvObj, pSession);
1016
1017 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1018 return rc;
1019 }
1020
1021 rc = VERR_NO_MEMORY;
1022 }
1023 else
1024 rc = VERR_GVMM_IPE_1;
1025 }
1026 else
1027 rc = VERR_GVM_TOO_MANY_VMS;
1028
1029 gvmmR0CreateDestroyUnlock(pGVMM);
1030 return rc;
1031}
1032
1033
1034/**
1035 * Initializes the per VM data belonging to GVMM.
1036 *
1037 * @param pGVM Pointer to the global VM structure.
1038 * @param hSelf The handle.
1039 * @param cCpus The CPU count.
1040 * @param pSession The session this VM is associated with.
1041 */
1042static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession)
1043{
1044 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1045 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1046 AssertCompileMemberAlignment(VM, cpum, 64);
1047 AssertCompileMemberAlignment(VM, tm, 64);
1048
1049 /* GVM: */
1050 pGVM->u32Magic = GVM_MAGIC;
1051 pGVM->hSelf = hSelf;
1052 pGVM->cCpus = cCpus;
1053 pGVM->pSession = pSession;
1054 pGVM->pSelf = pGVM;
1055
1056 /* VM: */
1057 pGVM->enmVMState = VMSTATE_CREATING;
1058 pGVM->hSelfUnsafe = hSelf;
1059 pGVM->pSessionUnsafe = pSession;
1060 pGVM->pVMR0ForCall = pGVM;
1061 pGVM->cCpusUnsafe = cCpus;
1062 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1063 pGVM->uStructVersion = 1;
1064 pGVM->cbSelf = sizeof(VM);
1065 pGVM->cbVCpu = sizeof(VMCPU);
1066
1067 /* GVMM: */
1068 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1069 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1070 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1071 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1072 pGVM->gvmm.s.fDoneVMMR0Init = false;
1073 pGVM->gvmm.s.fDoneVMMR0Term = false;
1074 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash); i++)
1075 {
1076 pGVM->gvmm.s.aEmtHash[i].hNativeEmt = NIL_RTNATIVETHREAD;
1077 pGVM->gvmm.s.aEmtHash[i].idVCpu = NIL_VMCPUID;
1078 }
1079
1080 /*
1081 * Per virtual CPU.
1082 */
1083 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1084 {
1085 pGVM->aCpus[i].idCpu = i;
1086 pGVM->aCpus[i].idCpuUnsafe = i;
1087 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1088 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1089 pGVM->aCpus[i].gvmm.s.idxEmtHash = UINT16_MAX;
1090 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1091 pGVM->aCpus[i].pGVM = pGVM;
1092 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1093 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1094 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1095 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1096 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1097 pGVM->aCpus[i].pVCpuR0ForVtg = &pGVM->aCpus[i];
1098 }
1099}
1100
1101
1102/**
1103 * Does the VM initialization.
1104 *
1105 * @returns VBox status code.
1106 * @param pGVM The global (ring-0) VM structure.
1107 */
1108GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1109{
1110 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1111
1112 int rc = VERR_INTERNAL_ERROR_3;
1113 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1114 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1115 {
1116 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1117 {
1118 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1119 if (RT_FAILURE(rc))
1120 {
1121 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1122 break;
1123 }
1124 }
1125 }
1126 else
1127 rc = VERR_WRONG_ORDER;
1128
1129 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1130 return rc;
1131}
1132
1133
1134/**
1135 * Indicates that we're done with the ring-0 initialization
1136 * of the VM.
1137 *
1138 * @param pGVM The global (ring-0) VM structure.
1139 * @thread EMT(0)
1140 */
1141GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1142{
1143 /* Set the indicator. */
1144 pGVM->gvmm.s.fDoneVMMR0Init = true;
1145}
1146
1147
1148/**
1149 * Indicates that we're doing the ring-0 termination of the VM.
1150 *
1151 * @returns true if termination hasn't been done already, false if it has.
1152 * @param pGVM Pointer to the global VM structure. Optional.
1153 * @thread EMT(0) or session cleanup thread.
1154 */
1155GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1156{
1157 /* Validate the VM structure, state and handle. */
1158 AssertPtrReturn(pGVM, false);
1159
1160 /* Set the indicator. */
1161 if (pGVM->gvmm.s.fDoneVMMR0Term)
1162 return false;
1163 pGVM->gvmm.s.fDoneVMMR0Term = true;
1164 return true;
1165}
1166
1167
1168/**
1169 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1170 *
1171 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1172 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1173 * would've been nice if the caller was actually the EMT thread or that we somehow
1174 * could've associated the calling thread with the VM up front.
1175 *
1176 * @returns VBox status code.
1177 * @param pGVM The global (ring-0) VM structure.
1178 *
1179 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1180 */
1181GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM)
1182{
1183 LogFlow(("GVMMR0DestroyVM: pGVM=%p\n", pGVM));
1184 PGVMM pGVMM;
1185 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1186
1187 /*
1188 * Validate the VM structure, state and caller.
1189 */
1190 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1191 AssertReturn(!((uintptr_t)pGVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1192 AssertMsgReturn(pGVM->enmVMState >= VMSTATE_CREATING && pGVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pGVM->enmVMState),
1193 VERR_WRONG_ORDER);
1194
1195 uint32_t hGVM = pGVM->hSelf;
1196 ASMCompilerBarrier();
1197 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1198 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1199
1200 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1201 AssertReturn(pHandle->pGVM == pGVM, VERR_NOT_OWNER);
1202
1203 RTPROCESS ProcId = RTProcSelf();
1204 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1205 AssertReturn( ( pHandle->hEMT0 == hSelf
1206 && pHandle->ProcId == ProcId)
1207 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1208
1209 /*
1210 * Lookup the handle and destroy the object.
1211 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1212 * object, we take some precautions against racing callers just in case...
1213 */
1214 int rc = gvmmR0CreateDestroyLock(pGVMM);
1215 AssertRC(rc);
1216
1217 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1218 if ( pHandle->pGVM == pGVM
1219 && ( ( pHandle->hEMT0 == hSelf
1220 && pHandle->ProcId == ProcId)
1221 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1222 && RT_VALID_PTR(pHandle->pvObj)
1223 && RT_VALID_PTR(pHandle->pSession)
1224 && RT_VALID_PTR(pHandle->pGVM)
1225 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1226 {
1227 /* Check that other EMTs have deregistered. */
1228 uint32_t cNotDeregistered = 0;
1229 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1230 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != GVMM_RTNATIVETHREAD_DESTROYED;
1231 if (cNotDeregistered == 0)
1232 {
1233 /* Grab the object pointer. */
1234 void *pvObj = pHandle->pvObj;
1235 pHandle->pvObj = NULL;
1236 gvmmR0CreateDestroyUnlock(pGVMM);
1237
1238 SUPR0ObjRelease(pvObj, pHandle->pSession);
1239 }
1240 else
1241 {
1242 gvmmR0CreateDestroyUnlock(pGVMM);
1243 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1244 }
1245 }
1246 else
1247 {
1248 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pGVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pGVM=%p hSelf=%p\n",
1249 pHandle, pHandle->pGVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pGVM, hSelf);
1250 gvmmR0CreateDestroyUnlock(pGVMM);
1251 rc = VERR_GVMM_IPE_2;
1252 }
1253
1254 return rc;
1255}
1256
1257
1258/**
1259 * Performs VM cleanup task as part of object destruction.
1260 *
1261 * @param pGVM The GVM pointer.
1262 */
1263static void gvmmR0CleanupVM(PGVM pGVM)
1264{
1265 if ( pGVM->gvmm.s.fDoneVMMR0Init
1266 && !pGVM->gvmm.s.fDoneVMMR0Term)
1267 {
1268 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1269 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM)
1270 {
1271 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1272 VMMR0TermVM(pGVM, NIL_VMCPUID);
1273 }
1274 else
1275 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1276 }
1277
1278 GMMR0CleanupVM(pGVM);
1279#ifdef VBOX_WITH_NEM_R0
1280 NEMR0CleanupVM(pGVM);
1281#endif
1282 PDMR0CleanupVM(pGVM);
1283 IOMR0CleanupVM(pGVM);
1284 DBGFR0CleanupVM(pGVM);
1285 PGMR0CleanupVM(pGVM);
1286 TMR0CleanupVM(pGVM);
1287 VMMR0CleanupVM(pGVM);
1288}
1289
1290
1291/**
1292 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1293 *
1294 * pvUser1 is the GVM instance pointer.
1295 * pvUser2 is the handle pointer.
1296 */
1297static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1298{
1299 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1300
1301 NOREF(pvObj);
1302
1303 /*
1304 * Some quick, paranoid, input validation.
1305 */
1306 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1307 AssertPtr(pHandle);
1308 PGVMM pGVMM = (PGVMM)pvUser1;
1309 Assert(pGVMM == g_pGVMM);
1310 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1311 if ( !iHandle
1312 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1313 || iHandle != pHandle->iSelf)
1314 {
1315 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1316 return;
1317 }
1318
1319 int rc = gvmmR0CreateDestroyLock(pGVMM);
1320 AssertRC(rc);
1321 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1322 AssertRC(rc);
1323
1324 /*
1325 * This is a tad slow but a doubly linked list is too much hassle.
1326 */
1327 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1328 {
1329 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1330 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1331 gvmmR0CreateDestroyUnlock(pGVMM);
1332 return;
1333 }
1334
1335 if (pGVMM->iUsedHead == iHandle)
1336 pGVMM->iUsedHead = pHandle->iNext;
1337 else
1338 {
1339 uint16_t iPrev = pGVMM->iUsedHead;
1340 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1341 while (iPrev)
1342 {
1343 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1344 {
1345 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1346 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1347 gvmmR0CreateDestroyUnlock(pGVMM);
1348 return;
1349 }
1350 if (RT_UNLIKELY(c-- <= 0))
1351 {
1352 iPrev = 0;
1353 break;
1354 }
1355
1356 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1357 break;
1358 iPrev = pGVMM->aHandles[iPrev].iNext;
1359 }
1360 if (!iPrev)
1361 {
1362 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1363 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1364 gvmmR0CreateDestroyUnlock(pGVMM);
1365 return;
1366 }
1367
1368 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1369 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1370 }
1371 pHandle->iNext = 0;
1372 pGVMM->cVMs--;
1373
1374 /*
1375 * Do the global cleanup round.
1376 */
1377 PGVM pGVM = pHandle->pGVM;
1378 if ( RT_VALID_PTR(pGVM)
1379 && pGVM->u32Magic == GVM_MAGIC)
1380 {
1381 pGVMM->cEMTs -= pGVM->cCpus;
1382
1383 if (pGVM->pSession)
1384 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1385
1386 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1387
1388 gvmmR0CleanupVM(pGVM);
1389
1390 /*
1391 * Do the GVMM cleanup - must be done last.
1392 */
1393 /* The VM and VM pages mappings/allocations. */
1394 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1395 {
1396 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1397 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1398 }
1399
1400 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1401 {
1402 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1403 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1404 }
1405
1406 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1407 {
1408 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1409 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1410 }
1411
1412 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1413 {
1414 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1415 {
1416 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1417 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1418 }
1419 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1420 {
1421 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1422 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1423 }
1424 }
1425
1426 /* the GVM structure itself. */
1427 pGVM->u32Magic |= UINT32_C(0x80000000);
1428 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1429 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1430 pGVM = NULL;
1431
1432 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1433 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1434 AssertRC(rc);
1435 }
1436 /* else: GVMMR0CreateVM cleanup. */
1437
1438 /*
1439 * Free the handle.
1440 */
1441 pHandle->iNext = pGVMM->iFreeHead;
1442 pGVMM->iFreeHead = iHandle;
1443 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1444 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1445 ASMAtomicWriteNullPtr(&pHandle->pSession);
1446 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1447 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1448
1449 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1450 gvmmR0CreateDestroyUnlock(pGVMM);
1451 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1452}
1453
1454
1455/**
1456 * Registers the calling thread as the EMT of a Virtual CPU.
1457 *
1458 * Note that VCPU 0 is automatically registered during VM creation.
1459 *
1460 * @returns VBox status code
1461 * @param pGVM The global (ring-0) VM structure.
1462 * @param idCpu VCPU id to register the current thread as.
1463 */
1464GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
1465{
1466 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1467
1468 /*
1469 * Validate the VM structure, state and handle.
1470 */
1471 PGVMM pGVMM;
1472 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /* fTakeUsedLock */);
1473 if (RT_SUCCESS(rc))
1474 {
1475 if (idCpu < pGVM->cCpus)
1476 {
1477 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1478
1479 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1480
1481 /* Check that the EMT isn't already assigned to a thread. */
1482 if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
1483 {
1484 Assert(pGVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1485
1486 /* A thread may only be one EMT (this makes sure hNativeSelf isn't NIL). */
1487 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1488 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1489 if (RT_SUCCESS(rc))
1490 {
1491 /*
1492 * Do the assignment, then try setup the hook. Undo if that fails.
1493 */
1494 unsigned cCollisions = 0;
1495 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
1496 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD)
1497 {
1498 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
1499 do
1500 {
1501 cCollisions++;
1502 Assert(cCollisions < GVMM_EMT_HASH_SIZE);
1503 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
1504 } while (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD);
1505 }
1506 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hNativeSelf;
1507 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = idCpu;
1508 pGVM->aCpus[idCpu].hNativeThreadR0 = hNativeSelf;
1509 pGVM->aCpus[idCpu].hEMT = hNativeSelf;
1510 pGVM->aCpus[idCpu].cEmtHashCollisions = (uint8_t)cCollisions;
1511 pGVM->aCpus[idCpu].gvmm.s.idxEmtHash = (uint16_t)idxHash;
1512
1513 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[idCpu]);
1514 if (RT_SUCCESS(rc))
1515 CPUMR0RegisterVCpuThread(&pGVM->aCpus[idCpu]);
1516 else
1517 {
1518 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1519 pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1520 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = NIL_RTNATIVETHREAD;
1521 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = NIL_VMCPUID;
1522 pGVM->aCpus[idCpu].gvmm.s.idxEmtHash = UINT16_MAX;
1523 }
1524 }
1525 }
1526 else
1527 rc = VERR_ACCESS_DENIED;
1528
1529 gvmmR0CreateDestroyUnlock(pGVMM);
1530 }
1531 else
1532 rc = VERR_INVALID_CPU_ID;
1533 }
1534 return rc;
1535}
1536
1537
1538/**
1539 * Deregisters the calling thread as the EMT of a Virtual CPU.
1540 *
1541 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1542 *
1543 * @returns VBox status code
1544 * @param pGVM The global (ring-0) VM structure.
1545 * @param idCpu VCPU id to register the current thread as.
1546 */
1547GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
1548{
1549 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1550
1551 /*
1552 * Validate the VM structure, state and handle.
1553 */
1554 PGVMM pGVMM;
1555 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1556 if (RT_SUCCESS(rc))
1557 {
1558 /*
1559 * Take the destruction lock and recheck the handle state to
1560 * prevent racing GVMMR0DestroyVM.
1561 */
1562 gvmmR0CreateDestroyLock(pGVMM);
1563
1564 uint32_t hSelf = pGVM->hSelf;
1565 ASMCompilerBarrier();
1566 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1567 && pGVMM->aHandles[hSelf].pvObj != NULL
1568 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1569 {
1570 /*
1571 * Do per-EMT cleanups.
1572 */
1573 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1574
1575 /*
1576 * Invalidate hEMT. We don't use NIL here as that would allow
1577 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1578 */
1579 pGVM->aCpus[idCpu].hEMT = GVMM_RTNATIVETHREAD_DESTROYED;
1580 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1581
1582 uint32_t const idxHash = pGVM->aCpus[idCpu].gvmm.s.idxEmtHash;
1583 if (idxHash < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash))
1584 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = GVMM_RTNATIVETHREAD_DESTROYED;
1585 }
1586
1587 gvmmR0CreateDestroyUnlock(pGVMM);
1588 }
1589 return rc;
1590}
1591
1592
1593/**
1594 * Lookup a GVM structure by its handle.
1595 *
1596 * @returns The GVM pointer on success, NULL on failure.
1597 * @param hGVM The global VM handle. Asserts on bad handle.
1598 */
1599GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1600{
1601 PGVMM pGVMM;
1602 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1603
1604 /*
1605 * Validate.
1606 */
1607 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1608 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1609
1610 /*
1611 * Look it up.
1612 */
1613 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1614 AssertPtrReturn(pHandle->pvObj, NULL);
1615 PGVM pGVM = pHandle->pGVM;
1616 AssertPtrReturn(pGVM, NULL);
1617
1618 return pGVM;
1619}
1620
1621
1622/**
1623 * Check that the given GVM and VM structures match up.
1624 *
1625 * The calling thread must be in the same process as the VM. All current lookups
1626 * are by threads inside the same process, so this will not be an issue.
1627 *
1628 * @returns VBox status code.
1629 * @param pGVM The global (ring-0) VM structure.
1630 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1631 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1632 * shared mode when requested.
1633 *
1634 * Be very careful if not taking the lock as it's
1635 * possible that the VM will disappear then!
1636 *
1637 * @remark This will not assert on an invalid pGVM but try return silently.
1638 */
1639static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1640{
1641 /*
1642 * Check the pointers.
1643 */
1644 int rc;
1645 if (RT_LIKELY( RT_VALID_PTR(pGVM)
1646 && ((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0 ))
1647 {
1648 /*
1649 * Get the pGVMM instance and check the VM handle.
1650 */
1651 PGVMM pGVMM;
1652 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1653
1654 uint16_t hGVM = pGVM->hSelf;
1655 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1656 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1657 {
1658 RTPROCESS const pidSelf = RTProcSelf();
1659 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1660 if (fTakeUsedLock)
1661 {
1662 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1663 AssertRCReturn(rc, rc);
1664 }
1665
1666 if (RT_LIKELY( pHandle->pGVM == pGVM
1667 && pHandle->ProcId == pidSelf
1668 && RT_VALID_PTR(pHandle->pvObj)))
1669 {
1670 /*
1671 * Some more VM data consistency checks.
1672 */
1673 if (RT_LIKELY( pGVM->cCpusUnsafe == pGVM->cCpus
1674 && pGVM->hSelfUnsafe == hGVM
1675 && pGVM->pSelf == pGVM))
1676 {
1677 if (RT_LIKELY( pGVM->enmVMState >= VMSTATE_CREATING
1678 && pGVM->enmVMState <= VMSTATE_TERMINATED))
1679 {
1680 *ppGVMM = pGVMM;
1681 return VINF_SUCCESS;
1682 }
1683 rc = VERR_INCONSISTENT_VM_HANDLE;
1684 }
1685 else
1686 rc = VERR_INCONSISTENT_VM_HANDLE;
1687 }
1688 else
1689 rc = VERR_INVALID_VM_HANDLE;
1690
1691 if (fTakeUsedLock)
1692 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1693 }
1694 else
1695 rc = VERR_INVALID_VM_HANDLE;
1696 }
1697 else
1698 rc = VERR_INVALID_POINTER;
1699 return rc;
1700}
1701
1702
1703/**
1704 * Validates a GVM/VM pair.
1705 *
1706 * @returns VBox status code.
1707 * @param pGVM The global (ring-0) VM structure.
1708 */
1709GVMMR0DECL(int) GVMMR0ValidateGVM(PGVM pGVM)
1710{
1711 PGVMM pGVMM;
1712 return gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1713}
1714
1715
1716/**
1717 * Check that the given GVM and VM structures match up.
1718 *
1719 * The calling thread must be in the same process as the VM. All current lookups
1720 * are by threads inside the same process, so this will not be an issue.
1721 *
1722 * @returns VBox status code.
1723 * @param pGVM The global (ring-0) VM structure.
1724 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
1725 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1726 * @thread EMT
1727 *
1728 * @remarks This will assert in all failure paths.
1729 */
1730static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM)
1731{
1732 /*
1733 * Check the pointers.
1734 */
1735 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1736 AssertReturn(((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
1737
1738 /*
1739 * Get the pGVMM instance and check the VM handle.
1740 */
1741 PGVMM pGVMM;
1742 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1743
1744 uint16_t hGVM = pGVM->hSelf;
1745 ASMCompilerBarrier();
1746 AssertReturn( hGVM != NIL_GVM_HANDLE
1747 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1748
1749 RTPROCESS const pidSelf = RTProcSelf();
1750 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1751 AssertReturn( pHandle->pGVM == pGVM
1752 && pHandle->ProcId == pidSelf
1753 && RT_VALID_PTR(pHandle->pvObj),
1754 VERR_INVALID_HANDLE);
1755
1756 /*
1757 * Check the EMT claim.
1758 */
1759 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
1760 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1761 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1762
1763 /*
1764 * Some more VM data consistency checks.
1765 */
1766 AssertReturn(pGVM->cCpusUnsafe == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
1767 AssertReturn(pGVM->hSelfUnsafe == hGVM, VERR_INCONSISTENT_VM_HANDLE);
1768 AssertReturn( pGVM->enmVMState >= VMSTATE_CREATING
1769 && pGVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
1770
1771 *ppGVMM = pGVMM;
1772 return VINF_SUCCESS;
1773}
1774
1775
1776/**
1777 * Validates a GVM/EMT pair.
1778 *
1779 * @returns VBox status code.
1780 * @param pGVM The global (ring-0) VM structure.
1781 * @param idCpu The Virtual CPU ID of the calling EMT.
1782 * @thread EMT(idCpu)
1783 */
1784GVMMR0DECL(int) GVMMR0ValidateGVMandEMT(PGVM pGVM, VMCPUID idCpu)
1785{
1786 PGVMM pGVMM;
1787 return gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1788}
1789
1790
1791/**
1792 * Looks up the VM belonging to the specified EMT thread.
1793 *
1794 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1795 * unnecessary kernel panics when the EMT thread hits an assertion. The
1796 * call may or not be an EMT thread.
1797 *
1798 * @returns Pointer to the VM on success, NULL on failure.
1799 * @param hEMT The native thread handle of the EMT.
1800 * NIL_RTNATIVETHREAD means the current thread
1801 */
1802GVMMR0DECL(PVMCC) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1803{
1804 /*
1805 * No Assertions here as we're usually called in a AssertMsgN or
1806 * RTAssert* context.
1807 */
1808 PGVMM pGVMM = g_pGVMM;
1809 if ( !RT_VALID_PTR(pGVMM)
1810 || pGVMM->u32Magic != GVMM_MAGIC)
1811 return NULL;
1812
1813 if (hEMT == NIL_RTNATIVETHREAD)
1814 hEMT = RTThreadNativeSelf();
1815 RTPROCESS ProcId = RTProcSelf();
1816
1817 /*
1818 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1819 */
1820/** @todo introduce some pid hash table here, please. */
1821 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1822 {
1823 if ( pGVMM->aHandles[i].iSelf == i
1824 && pGVMM->aHandles[i].ProcId == ProcId
1825 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
1826 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
1827 {
1828 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1829 return pGVMM->aHandles[i].pGVM;
1830
1831 /* This is fearly safe with the current process per VM approach. */
1832 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1833 VMCPUID const cCpus = pGVM->cCpus;
1834 ASMCompilerBarrier();
1835 if ( cCpus < 1
1836 || cCpus > VMM_MAX_CPU_COUNT)
1837 continue;
1838 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1839 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1840 return pGVMM->aHandles[i].pGVM;
1841 }
1842 }
1843 return NULL;
1844}
1845
1846
1847/**
1848 * Looks up the GVMCPU belonging to the specified EMT thread.
1849 *
1850 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1851 * unnecessary kernel panics when the EMT thread hits an assertion. The
1852 * call may or not be an EMT thread.
1853 *
1854 * @returns Pointer to the VM on success, NULL on failure.
1855 * @param hEMT The native thread handle of the EMT.
1856 * NIL_RTNATIVETHREAD means the current thread
1857 */
1858GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
1859{
1860 /*
1861 * No Assertions here as we're usually called in a AssertMsgN,
1862 * RTAssert*, Log and LogRel contexts.
1863 */
1864 PGVMM pGVMM = g_pGVMM;
1865 if ( !RT_VALID_PTR(pGVMM)
1866 || pGVMM->u32Magic != GVMM_MAGIC)
1867 return NULL;
1868
1869 if (hEMT == NIL_RTNATIVETHREAD)
1870 hEMT = RTThreadNativeSelf();
1871 RTPROCESS ProcId = RTProcSelf();
1872
1873 /*
1874 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1875 */
1876/** @todo introduce some pid hash table here, please. */
1877 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1878 {
1879 if ( pGVMM->aHandles[i].iSelf == i
1880 && pGVMM->aHandles[i].ProcId == ProcId
1881 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
1882 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
1883 {
1884 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1885 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1886 return &pGVM->aCpus[0];
1887
1888 /* This is fearly safe with the current process per VM approach. */
1889 VMCPUID const cCpus = pGVM->cCpus;
1890 ASMCompilerBarrier();
1891 ASMCompilerBarrier();
1892 if ( cCpus < 1
1893 || cCpus > VMM_MAX_CPU_COUNT)
1894 continue;
1895 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1896 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1897 return &pGVM->aCpus[idCpu];
1898 }
1899 }
1900 return NULL;
1901}
1902
1903
1904/**
1905 * Get the GVMCPU structure for the given EMT.
1906 *
1907 * @returns The VCpu structure for @a hEMT, NULL if not an EMT.
1908 * @param pGVM The global (ring-0) VM structure.
1909 * @param hEMT The native thread handle of the EMT.
1910 * NIL_RTNATIVETHREAD means the current thread
1911 */
1912GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByGVMandEMT(PGVM pGVM, RTNATIVETHREAD hEMT)
1913{
1914 /*
1915 * Validate & adjust input.
1916 */
1917 AssertPtr(pGVM);
1918 Assert(pGVM->u32Magic == GVM_MAGIC);
1919 if (hEMT == NIL_RTNATIVETHREAD /* likely */)
1920 {
1921 hEMT = RTThreadNativeSelf();
1922 AssertReturn(hEMT != NIL_RTNATIVETHREAD, NULL);
1923 }
1924
1925 /*
1926 * Find the matching hash table entry.
1927 */
1928 uint32_t idxHash = GVMM_EMT_HASH_1(hEMT);
1929 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
1930 { /* likely */ }
1931 else
1932 {
1933#ifdef VBOX_STRICT
1934 unsigned cCollisions = 0;
1935#endif
1936 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hEMT);
1937 for (;;)
1938 {
1939 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
1940 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
1941 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
1942 break;
1943 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
1944 {
1945#ifdef VBOX_STRICT
1946 uint32_t idxCpu = pGVM->cCpus;
1947 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
1948 while (idxCpu-- > 0)
1949 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hEMT);
1950#endif
1951 return NULL;
1952 }
1953 }
1954 }
1955
1956 /*
1957 * Validate the VCpu number and translate it into a pointer.
1958 */
1959 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
1960 AssertReturn(idCpu < pGVM->cCpus, NULL);
1961 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
1962 Assert(pGVCpu->hNativeThreadR0 == hEMT);
1963 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
1964 return pGVCpu;
1965}
1966
1967
1968/**
1969 * Converts a pointer with the GVM structure to a host physical address.
1970 *
1971 * @returns Host physical address.
1972 * @param pGVM The global (ring-0) VM structure.
1973 * @param pv The address to convert.
1974 * @thread EMT
1975 */
1976GVMMR0DECL(RTHCPHYS) GVMMR0ConvertGVMPtr2HCPhys(PGVM pGVM, void *pv)
1977{
1978 AssertPtr(pGVM);
1979 Assert(pGVM->u32Magic == GVM_MAGIC);
1980 uintptr_t const off = (uintptr_t)pv - (uintptr_t)pGVM;
1981 Assert(off < RT_UOFFSETOF_DYN(GVM, aCpus[pGVM->cCpus]));
1982 return RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, off >> PAGE_SHIFT) | ((uintptr_t)pv & PAGE_OFFSET_MASK);
1983}
1984
1985
1986/**
1987 * This is will wake up expired and soon-to-be expired VMs.
1988 *
1989 * @returns Number of VMs that has been woken up.
1990 * @param pGVMM Pointer to the GVMM instance data.
1991 * @param u64Now The current time.
1992 */
1993static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
1994{
1995 /*
1996 * Skip this if we've got disabled because of high resolution wakeups or by
1997 * the user.
1998 */
1999 if (!pGVMM->fDoEarlyWakeUps)
2000 return 0;
2001
2002/** @todo Rewrite this algorithm. See performance defect XYZ. */
2003
2004 /*
2005 * A cheap optimization to stop wasting so much time here on big setups.
2006 */
2007 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
2008 if ( pGVMM->cHaltedEMTs == 0
2009 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
2010 return 0;
2011
2012 /*
2013 * Only one thread doing this at a time.
2014 */
2015 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
2016 return 0;
2017
2018 /*
2019 * The first pass will wake up VMs which have actually expired
2020 * and look for VMs that should be woken up in the 2nd and 3rd passes.
2021 */
2022 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
2023 uint64_t u64Min = UINT64_MAX;
2024 unsigned cWoken = 0;
2025 unsigned cHalted = 0;
2026 unsigned cTodo2nd = 0;
2027 unsigned cTodo3rd = 0;
2028 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2029 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2030 i = pGVMM->aHandles[i].iNext)
2031 {
2032 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2033 if ( RT_VALID_PTR(pCurGVM)
2034 && pCurGVM->u32Magic == GVM_MAGIC)
2035 {
2036 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2037 {
2038 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2039 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2040 if (u64)
2041 {
2042 if (u64 <= u64Now)
2043 {
2044 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2045 {
2046 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2047 AssertRC(rc);
2048 cWoken++;
2049 }
2050 }
2051 else
2052 {
2053 cHalted++;
2054 if (u64 <= uNsEarlyWakeUp1)
2055 cTodo2nd++;
2056 else if (u64 <= uNsEarlyWakeUp2)
2057 cTodo3rd++;
2058 else if (u64 < u64Min)
2059 u64 = u64Min;
2060 }
2061 }
2062 }
2063 }
2064 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2065 }
2066
2067 if (cTodo2nd)
2068 {
2069 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2070 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2071 i = pGVMM->aHandles[i].iNext)
2072 {
2073 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2074 if ( RT_VALID_PTR(pCurGVM)
2075 && pCurGVM->u32Magic == GVM_MAGIC)
2076 {
2077 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2078 {
2079 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2080 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2081 if ( u64
2082 && u64 <= uNsEarlyWakeUp1)
2083 {
2084 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2085 {
2086 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2087 AssertRC(rc);
2088 cWoken++;
2089 }
2090 }
2091 }
2092 }
2093 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2094 }
2095 }
2096
2097 if (cTodo3rd)
2098 {
2099 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2100 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2101 i = pGVMM->aHandles[i].iNext)
2102 {
2103 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2104 if ( RT_VALID_PTR(pCurGVM)
2105 && pCurGVM->u32Magic == GVM_MAGIC)
2106 {
2107 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2108 {
2109 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2110 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2111 if ( u64
2112 && u64 <= uNsEarlyWakeUp2)
2113 {
2114 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2115 {
2116 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2117 AssertRC(rc);
2118 cWoken++;
2119 }
2120 }
2121 }
2122 }
2123 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2124 }
2125 }
2126
2127 /*
2128 * Set the minimum value.
2129 */
2130 pGVMM->uNsNextEmtWakeup = u64Min;
2131
2132 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2133 return cWoken;
2134}
2135
2136
2137/**
2138 * Halt the EMT thread.
2139 *
2140 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2141 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2142 * @param pGVM The global (ring-0) VM structure.
2143 * @param pGVCpu The global (ring-0) CPU structure of the calling
2144 * EMT.
2145 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2146 * @thread EMT(pGVCpu).
2147 */
2148GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2149{
2150 LogFlow(("GVMMR0SchedHalt: pGVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2151 pGVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2152 PGVMM pGVMM;
2153 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2154
2155 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2156 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2157
2158 /*
2159 * If we're doing early wake-ups, we must take the UsedList lock before we
2160 * start querying the current time.
2161 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2162 */
2163 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2164 if (fDoEarlyWakeUps)
2165 {
2166 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2167 }
2168
2169 pGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
2170
2171 /* GIP hack: We might are frequently sleeping for short intervals where the
2172 difference between GIP and system time matters on systems with high resolution
2173 system time. So, convert the input from GIP to System time in that case. */
2174 Assert(ASMGetFlags() & X86_EFL_IF);
2175 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2176 const uint64_t u64NowGip = RTTimeNanoTS();
2177
2178 if (fDoEarlyWakeUps)
2179 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2180
2181 /*
2182 * Go to sleep if we must...
2183 * Cap the sleep time to 1 second to be on the safe side.
2184 */
2185 int rc;
2186 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2187 if ( u64NowGip < u64ExpireGipTime
2188 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2189 ? pGVMM->nsMinSleepCompany
2190 : pGVMM->nsMinSleepAlone))
2191 {
2192 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2193 if (cNsInterval > RT_NS_1SEC)
2194 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2195 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2196 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2197 if (fDoEarlyWakeUps)
2198 {
2199 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2200 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2201 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2202 }
2203
2204 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2205 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2206 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2207
2208 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2209 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2210
2211 /* Reset the semaphore to try prevent a few false wake-ups. */
2212 if (rc == VINF_SUCCESS)
2213 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2214 else if (rc == VERR_TIMEOUT)
2215 {
2216 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2217 rc = VINF_SUCCESS;
2218 }
2219 }
2220 else
2221 {
2222 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2223 if (fDoEarlyWakeUps)
2224 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2225 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2226 rc = VINF_SUCCESS;
2227 }
2228
2229 return rc;
2230}
2231
2232
2233/**
2234 * Halt the EMT thread.
2235 *
2236 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2237 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2238 * @param pGVM The global (ring-0) VM structure.
2239 * @param idCpu The Virtual CPU ID of the calling EMT.
2240 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2241 * @thread EMT(idCpu).
2242 */
2243GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2244{
2245 PGVMM pGVMM;
2246 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2247 if (RT_SUCCESS(rc))
2248 rc = GVMMR0SchedHalt(pGVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2249 return rc;
2250}
2251
2252
2253
2254/**
2255 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2256 * the a sleeping EMT.
2257 *
2258 * @retval VINF_SUCCESS if successfully woken up.
2259 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2260 *
2261 * @param pGVM The global (ring-0) VM structure.
2262 * @param pGVCpu The global (ring-0) VCPU structure.
2263 */
2264DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2265{
2266 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2267
2268 /*
2269 * Signal the semaphore regardless of whether it's current blocked on it.
2270 *
2271 * The reason for this is that there is absolutely no way we can be 100%
2272 * certain that it isn't *about* go to go to sleep on it and just got
2273 * delayed a bit en route. So, we will always signal the semaphore when
2274 * the it is flagged as halted in the VMM.
2275 */
2276/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2277 int rc;
2278 if (pGVCpu->gvmm.s.u64HaltExpire)
2279 {
2280 rc = VINF_SUCCESS;
2281 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2282 }
2283 else
2284 {
2285 rc = VINF_GVM_NOT_BLOCKED;
2286 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2287 }
2288
2289 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2290 AssertRC(rc2);
2291
2292 return rc;
2293}
2294
2295
2296/**
2297 * Wakes up the halted EMT thread so it can service a pending request.
2298 *
2299 * @returns VBox status code.
2300 * @retval VINF_SUCCESS if successfully woken up.
2301 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2302 *
2303 * @param pGVM The global (ring-0) VM structure.
2304 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2305 * @param fTakeUsedLock Take the used lock or not
2306 * @thread Any but EMT(idCpu).
2307 */
2308GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2309{
2310 /*
2311 * Validate input and take the UsedLock.
2312 */
2313 PGVMM pGVMM;
2314 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2315 if (RT_SUCCESS(rc))
2316 {
2317 if (idCpu < pGVM->cCpus)
2318 {
2319 /*
2320 * Do the actual job.
2321 */
2322 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2323
2324 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2325 {
2326 /*
2327 * While we're here, do a round of scheduling.
2328 */
2329 Assert(ASMGetFlags() & X86_EFL_IF);
2330 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2331 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2332 }
2333 }
2334 else
2335 rc = VERR_INVALID_CPU_ID;
2336
2337 if (fTakeUsedLock)
2338 {
2339 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2340 AssertRC(rc2);
2341 }
2342 }
2343
2344 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2345 return rc;
2346}
2347
2348
2349/**
2350 * Wakes up the halted EMT thread so it can service a pending request.
2351 *
2352 * @returns VBox status code.
2353 * @retval VINF_SUCCESS if successfully woken up.
2354 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2355 *
2356 * @param pGVM The global (ring-0) VM structure.
2357 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2358 * @thread Any but EMT(idCpu).
2359 */
2360GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, VMCPUID idCpu)
2361{
2362 return GVMMR0SchedWakeUpEx(pGVM, idCpu, true /* fTakeUsedLock */);
2363}
2364
2365
2366/**
2367 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2368 * parameter and no used locking.
2369 *
2370 * @returns VBox status code.
2371 * @retval VINF_SUCCESS if successfully woken up.
2372 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2373 *
2374 * @param pGVM The global (ring-0) VM structure.
2375 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2376 * @thread Any but EMT(idCpu).
2377 * @deprecated Don't use in new code if possible! Use the GVM variant.
2378 */
2379GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2380{
2381 PGVMM pGVMM;
2382 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2383 if (RT_SUCCESS(rc))
2384 rc = GVMMR0SchedWakeUpEx(pGVM, idCpu, false /*fTakeUsedLock*/);
2385 return rc;
2386}
2387
2388
2389/**
2390 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2391 * the Virtual CPU if it's still busy executing guest code.
2392 *
2393 * @returns VBox status code.
2394 * @retval VINF_SUCCESS if poked successfully.
2395 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2396 *
2397 * @param pGVM The global (ring-0) VM structure.
2398 * @param pVCpu The cross context virtual CPU structure.
2399 */
2400DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPUCC pVCpu)
2401{
2402 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2403
2404 RTCPUID idHostCpu = pVCpu->idHostCpu;
2405 if ( idHostCpu == NIL_RTCPUID
2406 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2407 {
2408 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2409 return VINF_GVM_NOT_BUSY_IN_GC;
2410 }
2411
2412 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2413 RTMpPokeCpu(idHostCpu);
2414 return VINF_SUCCESS;
2415}
2416
2417
2418/**
2419 * Pokes an EMT if it's still busy running guest code.
2420 *
2421 * @returns VBox status code.
2422 * @retval VINF_SUCCESS if poked successfully.
2423 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2424 *
2425 * @param pGVM The global (ring-0) VM structure.
2426 * @param idCpu The ID of the virtual CPU to poke.
2427 * @param fTakeUsedLock Take the used lock or not
2428 */
2429GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2430{
2431 /*
2432 * Validate input and take the UsedLock.
2433 */
2434 PGVMM pGVMM;
2435 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2436 if (RT_SUCCESS(rc))
2437 {
2438 if (idCpu < pGVM->cCpus)
2439 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2440 else
2441 rc = VERR_INVALID_CPU_ID;
2442
2443 if (fTakeUsedLock)
2444 {
2445 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2446 AssertRC(rc2);
2447 }
2448 }
2449
2450 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2451 return rc;
2452}
2453
2454
2455/**
2456 * Pokes an EMT if it's still busy running guest code.
2457 *
2458 * @returns VBox status code.
2459 * @retval VINF_SUCCESS if poked successfully.
2460 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2461 *
2462 * @param pGVM The global (ring-0) VM structure.
2463 * @param idCpu The ID of the virtual CPU to poke.
2464 */
2465GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, VMCPUID idCpu)
2466{
2467 return GVMMR0SchedPokeEx(pGVM, idCpu, true /* fTakeUsedLock */);
2468}
2469
2470
2471/**
2472 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2473 * used locking.
2474 *
2475 * @returns VBox status code.
2476 * @retval VINF_SUCCESS if poked successfully.
2477 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2478 *
2479 * @param pGVM The global (ring-0) VM structure.
2480 * @param idCpu The ID of the virtual CPU to poke.
2481 *
2482 * @deprecated Don't use in new code if possible! Use the GVM variant.
2483 */
2484GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2485{
2486 PGVMM pGVMM;
2487 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2488 if (RT_SUCCESS(rc))
2489 {
2490 if (idCpu < pGVM->cCpus)
2491 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2492 else
2493 rc = VERR_INVALID_CPU_ID;
2494 }
2495 return rc;
2496}
2497
2498
2499/**
2500 * Wakes up a set of halted EMT threads so they can service pending request.
2501 *
2502 * @returns VBox status code, no informational stuff.
2503 *
2504 * @param pGVM The global (ring-0) VM structure.
2505 * @param pSleepSet The set of sleepers to wake up.
2506 * @param pPokeSet The set of CPUs to poke.
2507 */
2508GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2509{
2510 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2511 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2512 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2513
2514 /*
2515 * Validate input and take the UsedLock.
2516 */
2517 PGVMM pGVMM;
2518 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /* fTakeUsedLock */);
2519 if (RT_SUCCESS(rc))
2520 {
2521 rc = VINF_SUCCESS;
2522 VMCPUID idCpu = pGVM->cCpus;
2523 while (idCpu-- > 0)
2524 {
2525 /* Don't try poke or wake up ourselves. */
2526 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2527 continue;
2528
2529 /* just ignore errors for now. */
2530 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2531 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2532 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2533 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2534 }
2535
2536 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2537 AssertRC(rc2);
2538 }
2539
2540 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2541 return rc;
2542}
2543
2544
2545/**
2546 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2547 *
2548 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2549 * @param pGVM The global (ring-0) VM structure.
2550 * @param pReq Pointer to the request packet.
2551 */
2552GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2553{
2554 /*
2555 * Validate input and pass it on.
2556 */
2557 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2558 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2559
2560 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, &pReq->SleepSet, &pReq->PokeSet);
2561}
2562
2563
2564
2565/**
2566 * Poll the schedule to see if someone else should get a chance to run.
2567 *
2568 * This is a bit hackish and will not work too well if the machine is
2569 * under heavy load from non-VM processes.
2570 *
2571 * @returns VINF_SUCCESS if not yielded.
2572 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2573 * @param pGVM The global (ring-0) VM structure.
2574 * @param idCpu The Virtual CPU ID of the calling EMT.
2575 * @param fYield Whether to yield or not.
2576 * This is for when we're spinning in the halt loop.
2577 * @thread EMT(idCpu).
2578 */
2579GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, VMCPUID idCpu, bool fYield)
2580{
2581 /*
2582 * Validate input.
2583 */
2584 PGVMM pGVMM;
2585 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2586 if (RT_SUCCESS(rc))
2587 {
2588 /*
2589 * We currently only implement helping doing wakeups (fYield = false), so don't
2590 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2591 */
2592 if (!fYield && pGVMM->fDoEarlyWakeUps)
2593 {
2594 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2595 pGVM->gvmm.s.StatsSched.cPollCalls++;
2596
2597 Assert(ASMGetFlags() & X86_EFL_IF);
2598 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2599
2600 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2601
2602 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2603 }
2604 /*
2605 * Not quite sure what we could do here...
2606 */
2607 else if (fYield)
2608 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2609 else
2610 rc = VINF_SUCCESS;
2611 }
2612
2613 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2614 return rc;
2615}
2616
2617
2618#ifdef GVMM_SCHED_WITH_PPT
2619/**
2620 * Timer callback for the periodic preemption timer.
2621 *
2622 * @param pTimer The timer handle.
2623 * @param pvUser Pointer to the per cpu structure.
2624 * @param iTick The current tick.
2625 */
2626static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2627{
2628 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2629 NOREF(pTimer); NOREF(iTick);
2630
2631 /*
2632 * Termination check
2633 */
2634 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2635 return;
2636
2637 /*
2638 * Do the house keeping.
2639 */
2640 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2641
2642 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2643 {
2644 /*
2645 * Historicize the max frequency.
2646 */
2647 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2648 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2649 pCpu->Ppt.iTickHistorization = 0;
2650 pCpu->Ppt.uDesiredHz = 0;
2651
2652 /*
2653 * Check if the current timer frequency.
2654 */
2655 uint32_t uHistMaxHz = 0;
2656 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2657 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2658 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2659 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2660 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2661 else if (uHistMaxHz)
2662 {
2663 /*
2664 * Reprogram it.
2665 */
2666 pCpu->Ppt.cChanges++;
2667 pCpu->Ppt.iTickHistorization = 0;
2668 pCpu->Ppt.uTimerHz = uHistMaxHz;
2669 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2670 pCpu->Ppt.cNsInterval = cNsInterval;
2671 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2672 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2673 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2674 / cNsInterval;
2675 else
2676 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2677 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2678
2679 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2680 RTTimerChangeInterval(pTimer, cNsInterval);
2681 }
2682 else
2683 {
2684 /*
2685 * Stop it.
2686 */
2687 pCpu->Ppt.fStarted = false;
2688 pCpu->Ppt.uTimerHz = 0;
2689 pCpu->Ppt.cNsInterval = 0;
2690 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2691
2692 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2693 RTTimerStop(pTimer);
2694 }
2695 }
2696 else
2697 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2698}
2699#endif /* GVMM_SCHED_WITH_PPT */
2700
2701
2702/**
2703 * Updates the periodic preemption timer for the calling CPU.
2704 *
2705 * The caller must have disabled preemption!
2706 * The caller must check that the host can do high resolution timers.
2707 *
2708 * @param pGVM The global (ring-0) VM structure.
2709 * @param idHostCpu The current host CPU id.
2710 * @param uHz The desired frequency.
2711 */
2712GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PGVM pGVM, RTCPUID idHostCpu, uint32_t uHz)
2713{
2714 NOREF(pGVM);
2715#ifdef GVMM_SCHED_WITH_PPT
2716 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2717 Assert(RTTimerCanDoHighResolution());
2718
2719 /*
2720 * Resolve the per CPU data.
2721 */
2722 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2723 PGVMM pGVMM = g_pGVMM;
2724 if ( !RT_VALID_PTR(pGVMM)
2725 || pGVMM->u32Magic != GVMM_MAGIC)
2726 return;
2727 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2728 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2729 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2730 && pCpu->idCpu == idHostCpu,
2731 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2732
2733 /*
2734 * Check whether we need to do anything about the timer.
2735 * We have to be a little bit careful since we might be race the timer
2736 * callback here.
2737 */
2738 if (uHz > 16384)
2739 uHz = 16384; /** @todo add a query method for this! */
2740 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2741 && uHz >= pCpu->Ppt.uMinHz
2742 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2743 {
2744 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2745
2746 pCpu->Ppt.uDesiredHz = uHz;
2747 uint32_t cNsInterval = 0;
2748 if (!pCpu->Ppt.fStarted)
2749 {
2750 pCpu->Ppt.cStarts++;
2751 pCpu->Ppt.fStarted = true;
2752 pCpu->Ppt.fStarting = true;
2753 pCpu->Ppt.iTickHistorization = 0;
2754 pCpu->Ppt.uTimerHz = uHz;
2755 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2756 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2757 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2758 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2759 / cNsInterval;
2760 else
2761 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2762 }
2763
2764 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2765
2766 if (cNsInterval)
2767 {
2768 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2769 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2770 AssertRC(rc);
2771
2772 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2773 if (RT_FAILURE(rc))
2774 pCpu->Ppt.fStarted = false;
2775 pCpu->Ppt.fStarting = false;
2776 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2777 }
2778 }
2779#else /* !GVMM_SCHED_WITH_PPT */
2780 NOREF(idHostCpu); NOREF(uHz);
2781#endif /* !GVMM_SCHED_WITH_PPT */
2782}
2783
2784
2785/**
2786 * Calls @a pfnCallback for each VM in the system.
2787 *
2788 * This will enumerate the VMs while holding the global VM used list lock in
2789 * shared mode. So, only suitable for simple work. If more expensive work
2790 * needs doing, a different approach must be taken as using this API would
2791 * otherwise block VM creation and destruction.
2792 *
2793 * @returns VBox status code.
2794 * @param pfnCallback The callback function.
2795 * @param pvUser User argument to the callback.
2796 */
2797GVMMR0DECL(int) GVMMR0EnumVMs(PFNGVMMR0ENUMCALLBACK pfnCallback, void *pvUser)
2798{
2799 PGVMM pGVMM;
2800 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2801
2802 int rc = VINF_SUCCESS;
2803 GVMMR0_USED_SHARED_LOCK(pGVMM);
2804 for (unsigned i = pGVMM->iUsedHead, cLoops = 0;
2805 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2806 i = pGVMM->aHandles[i].iNext, cLoops++)
2807 {
2808 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2809 if ( RT_VALID_PTR(pGVM)
2810 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2811 && pGVM->u32Magic == GVM_MAGIC)
2812 {
2813 rc = pfnCallback(pGVM, pvUser);
2814 if (rc != VINF_SUCCESS)
2815 break;
2816 }
2817
2818 AssertBreak(cLoops < RT_ELEMENTS(pGVMM->aHandles) * 4); /* paranoia */
2819 }
2820 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2821 return rc;
2822}
2823
2824
2825/**
2826 * Retrieves the GVMM statistics visible to the caller.
2827 *
2828 * @returns VBox status code.
2829 *
2830 * @param pStats Where to put the statistics.
2831 * @param pSession The current session.
2832 * @param pGVM The GVM to obtain statistics for. Optional.
2833 */
2834GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
2835{
2836 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
2837
2838 /*
2839 * Validate input.
2840 */
2841 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2842 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2843 pStats->cVMs = 0; /* (crash before taking the sem...) */
2844
2845 /*
2846 * Take the lock and get the VM statistics.
2847 */
2848 PGVMM pGVMM;
2849 if (pGVM)
2850 {
2851 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
2852 if (RT_FAILURE(rc))
2853 return rc;
2854 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2855 }
2856 else
2857 {
2858 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2859 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2860
2861 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2862 AssertRCReturn(rc, rc);
2863 }
2864
2865 /*
2866 * Enumerate the VMs and add the ones visible to the statistics.
2867 */
2868 pStats->cVMs = 0;
2869 pStats->cEMTs = 0;
2870 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2871
2872 for (unsigned i = pGVMM->iUsedHead;
2873 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2874 i = pGVMM->aHandles[i].iNext)
2875 {
2876 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
2877 void *pvObj = pGVMM->aHandles[i].pvObj;
2878 if ( RT_VALID_PTR(pvObj)
2879 && RT_VALID_PTR(pOtherGVM)
2880 && pOtherGVM->u32Magic == GVM_MAGIC
2881 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2882 {
2883 pStats->cVMs++;
2884 pStats->cEMTs += pOtherGVM->cCpus;
2885
2886 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
2887 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
2888 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
2889 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2890 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
2891
2892 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
2893 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2894 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2895
2896 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
2897 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
2898
2899 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
2900 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
2901 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
2902 }
2903 }
2904
2905 /*
2906 * Copy out the per host CPU statistics.
2907 */
2908 uint32_t iDstCpu = 0;
2909 uint32_t cSrcCpus = pGVMM->cHostCpus;
2910 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2911 {
2912 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2913 {
2914 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2915 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2916#ifdef GVMM_SCHED_WITH_PPT
2917 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2918 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2919 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2920 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2921#else
2922 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2923 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2924 pStats->aHostCpus[iDstCpu].cChanges = 0;
2925 pStats->aHostCpus[iDstCpu].cStarts = 0;
2926#endif
2927 iDstCpu++;
2928 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
2929 break;
2930 }
2931 }
2932 pStats->cHostCpus = iDstCpu;
2933
2934 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2935
2936 return VINF_SUCCESS;
2937}
2938
2939
2940/**
2941 * VMMR0 request wrapper for GVMMR0QueryStatistics.
2942 *
2943 * @returns see GVMMR0QueryStatistics.
2944 * @param pGVM The global (ring-0) VM structure. Optional.
2945 * @param pReq Pointer to the request packet.
2946 * @param pSession The current session.
2947 */
2948GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
2949{
2950 /*
2951 * Validate input and pass it on.
2952 */
2953 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2954 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2955 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
2956
2957 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM);
2958}
2959
2960
2961/**
2962 * Resets the specified GVMM statistics.
2963 *
2964 * @returns VBox status code.
2965 *
2966 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
2967 * @param pSession The current session.
2968 * @param pGVM The GVM to reset statistics for. Optional.
2969 */
2970GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
2971{
2972 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
2973
2974 /*
2975 * Validate input.
2976 */
2977 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2978 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2979
2980 /*
2981 * Take the lock and get the VM statistics.
2982 */
2983 PGVMM pGVMM;
2984 if (pGVM)
2985 {
2986 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
2987 if (RT_FAILURE(rc))
2988 return rc;
2989# define MAYBE_RESET_FIELD(field) \
2990 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2991 MAYBE_RESET_FIELD(cHaltCalls);
2992 MAYBE_RESET_FIELD(cHaltBlocking);
2993 MAYBE_RESET_FIELD(cHaltTimeouts);
2994 MAYBE_RESET_FIELD(cHaltNotBlocking);
2995 MAYBE_RESET_FIELD(cHaltWakeUps);
2996 MAYBE_RESET_FIELD(cWakeUpCalls);
2997 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2998 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2999 MAYBE_RESET_FIELD(cPokeCalls);
3000 MAYBE_RESET_FIELD(cPokeNotBusy);
3001 MAYBE_RESET_FIELD(cPollCalls);
3002 MAYBE_RESET_FIELD(cPollHalts);
3003 MAYBE_RESET_FIELD(cPollWakeUps);
3004# undef MAYBE_RESET_FIELD
3005 }
3006 else
3007 {
3008 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3009
3010 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3011 AssertRCReturn(rc, rc);
3012 }
3013
3014 /*
3015 * Enumerate the VMs and add the ones visible to the statistics.
3016 */
3017 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
3018 {
3019 for (unsigned i = pGVMM->iUsedHead;
3020 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3021 i = pGVMM->aHandles[i].iNext)
3022 {
3023 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3024 void *pvObj = pGVMM->aHandles[i].pvObj;
3025 if ( RT_VALID_PTR(pvObj)
3026 && RT_VALID_PTR(pOtherGVM)
3027 && pOtherGVM->u32Magic == GVM_MAGIC
3028 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3029 {
3030# define MAYBE_RESET_FIELD(field) \
3031 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3032 MAYBE_RESET_FIELD(cHaltCalls);
3033 MAYBE_RESET_FIELD(cHaltBlocking);
3034 MAYBE_RESET_FIELD(cHaltTimeouts);
3035 MAYBE_RESET_FIELD(cHaltNotBlocking);
3036 MAYBE_RESET_FIELD(cHaltWakeUps);
3037 MAYBE_RESET_FIELD(cWakeUpCalls);
3038 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3039 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3040 MAYBE_RESET_FIELD(cPokeCalls);
3041 MAYBE_RESET_FIELD(cPokeNotBusy);
3042 MAYBE_RESET_FIELD(cPollCalls);
3043 MAYBE_RESET_FIELD(cPollHalts);
3044 MAYBE_RESET_FIELD(cPollWakeUps);
3045# undef MAYBE_RESET_FIELD
3046 }
3047 }
3048 }
3049
3050 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3051
3052 return VINF_SUCCESS;
3053}
3054
3055
3056/**
3057 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3058 *
3059 * @returns see GVMMR0ResetStatistics.
3060 * @param pGVM The global (ring-0) VM structure. Optional.
3061 * @param pReq Pointer to the request packet.
3062 * @param pSession The current session.
3063 */
3064GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3065{
3066 /*
3067 * Validate input and pass it on.
3068 */
3069 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3070 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3071 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3072
3073 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM);
3074}
3075
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette