GVMMR0.cpp@ 75649

Last change on this file since 75649 was 75646, checked in by vboxsync, 6 years ago
VMM: HLT/MWAIT optimizations for busy guests: don't go back to ring-3 just to call GVMMR0SchedHalt(), do the first call in ring-0. This saves a reduces interrupt latency for some workloads. bugref:9172
Property svn:eol-style set to `native` Property svn:keywords set to `Id Revision`
File size: 108.9 KB

Line
1	/* $Id: GVMMR0.cpp 75646 2018-11-21 15:38:10Z vboxsync $ */
2	/** @file
3	* GVMM - Global VM Manager.
4	*/
5
6	/*
7	* Copyright (C) 2007-2017 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18
19	/** @page pg_gvmm GVMM - The Global VM Manager
20	*
21	* The Global VM Manager lives in ring-0. Its main function at the moment is
22	* to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23	* each of them, and assign them unique identifiers (so GMM can track page
24	* owners). The GVMM also manage some of the host CPU resources, like the
25	* periodic preemption timer.
26	*
27	* The GVMM will create a ring-0 object for each VM when it is registered, this
28	* is both for session cleanup purposes and for having a point where it is
29	* possible to implement usage polices later (in SUPR0ObjRegister).
30	*
31	*
32	* @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33	*
34	* On system that sports a high resolution kernel timer API, we use per-cpu
35	* timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36	* execution. The timer frequency is calculating by taking the max
37	* TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38	* (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39	* GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40	*
41	* The TMCalcHostTimerFrequency() part of the things gets its takes the max
42	* TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43	* warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44	* GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45	* AMD-V and raw-mode execution environments.
46	*/
47
48
49	/*********************************************************************************************************************************
50	* Header Files *
51	*********************************************************************************************************************************/
52	#define LOG_GROUP LOG_GROUP_GVMM
53	#include <VBox/vmm/gvmm.h>
54	#include <VBox/vmm/gmm.h>
55	#include "GVMMR0Internal.h"
56	#include <VBox/vmm/gvm.h>
57	#include <VBox/vmm/vm.h>
58	#include <VBox/vmm/vmcpuset.h>
59	#include <VBox/vmm/vmm.h>
60	#ifdef VBOX_WITH_NEM_R0
61	# include <VBox/vmm/nem.h>
62	#endif
63	#include <VBox/param.h>
64	#include <VBox/err.h>
65
66	#include <iprt/asm.h>
67	#include <iprt/asm-amd64-x86.h>
68	#include <iprt/critsect.h>
69	#include <iprt/mem.h>
70	#include <iprt/semaphore.h>
71	#include <iprt/time.h>
72	#include <VBox/log.h>
73	#include <iprt/thread.h>
74	#include <iprt/process.h>
75	#include <iprt/param.h>
76	#include <iprt/string.h>
77	#include <iprt/assert.h>
78	#include <iprt/mem.h>
79	#include <iprt/memobj.h>
80	#include <iprt/mp.h>
81	#include <iprt/cpuset.h>
82	#include <iprt/spinlock.h>
83	#include <iprt/timer.h>
84
85	#include "dtrace/VBoxVMM.h"
86
87
88	/*********************************************************************************************************************************
89	* Defined Constants And Macros *
90	*********************************************************************************************************************************/
91	#if defined(RT_OS_LINUX) \|\| defined(RT_OS_SOLARIS) \|\| defined(DOXYGEN_RUNNING)
92	/** Define this to enable the periodic preemption timer. */
93	# define GVMM_SCHED_WITH_PPT
94	#endif
95
96
97	/** @def GVMM_CHECK_SMAP_SETUP
98	* SMAP check setup. */
99	/** @def GVMM_CHECK_SMAP_CHECK
100	* Checks that the AC flag is set if SMAP is enabled. If AC is not set,
101	* it will be logged and @a a_BadExpr is executed. */
102	/** @def GVMM_CHECK_SMAP_CHECK2
103	* Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will
104	* be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
105	* executed. */
106	#if defined(VBOX_STRICT) \|\| 1
107	# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
108	# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) \
109	do { \
110	if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
111	{ \
112	RTCCUINTREG fEflCheck = ASMGetFlags(); \
113	if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
114	{ /* likely */ } \
115	else \
116	{ \
117	SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
118	a_BadExpr; \
119	} \
120	} \
121	} while (0)
122	# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) \
123	do { \
124	if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
125	{ \
126	RTCCUINTREG fEflCheck = ASMGetFlags(); \
127	if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
128	{ /* likely */ } \
129	else \
130	{ \
131	SUPR0BadContext((a_pVM) ? (a_pVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
132	a_BadExpr; \
133	} \
134	} \
135	} while (0)
136	#else
137	# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0
138	# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures)
139	# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) NOREF(fKernelFeatures)
140	#endif
141
142
143
144	/*********************************************************************************************************************************
145	* Structures and Typedefs *
146	*********************************************************************************************************************************/
147
148	/**
149	* Global VM handle.
150	*/
151	typedef struct GVMHANDLE
152	{
153	/** The index of the next handle in the list (free or used). (0 is nil.) */
154	uint16_t volatile iNext;
155	/** Our own index / handle value. */
156	uint16_t iSelf;
157	/** The process ID of the handle owner.
158	* This is used for access checks. */
159	RTPROCESS ProcId;
160	/** The pointer to the ring-0 only (aka global) VM structure. */
161	PGVM pGVM;
162	/** The ring-0 mapping of the shared VM instance data. */
163	PVM pVM;
164	/** The virtual machine object. */
165	void *pvObj;
166	/** The session this VM is associated with. */
167	PSUPDRVSESSION pSession;
168	/** The ring-0 handle of the EMT0 thread.
169	* This is used for ownership checks as well as looking up a VM handle by thread
170	* at times like assertions. */
171	RTNATIVETHREAD hEMT0;
172	} GVMHANDLE;
173	/** Pointer to a global VM handle. */
174	typedef GVMHANDLE *PGVMHANDLE;
175
176	/** Number of GVM handles (including the NIL handle). */
177	#if HC_ARCH_BITS == 64
178	# define GVMM_MAX_HANDLES 8192
179	#else
180	# define GVMM_MAX_HANDLES 128
181	#endif
182
183	/**
184	* Per host CPU GVMM data.
185	*/
186	typedef struct GVMMHOSTCPU
187	{
188	/** Magic number (GVMMHOSTCPU_MAGIC). */
189	uint32_t volatile u32Magic;
190	/** The CPU ID. */
191	RTCPUID idCpu;
192	/** The CPU set index. */
193	uint32_t idxCpuSet;
194
195	#ifdef GVMM_SCHED_WITH_PPT
196	/** Periodic preemption timer data. */
197	struct
198	{
199	/** The handle to the periodic preemption timer. */
200	PRTTIMER pTimer;
201	/** Spinlock protecting the data below. */
202	RTSPINLOCK hSpinlock;
203	/** The smalles Hz that we need to care about. (static) */
204	uint32_t uMinHz;
205	/** The number of ticks between each historization. */
206	uint32_t cTicksHistoriziationInterval;
207	/** The current historization tick (counting up to
208	* cTicksHistoriziationInterval and then resetting). */
209	uint32_t iTickHistorization;
210	/** The current timer interval. This is set to 0 when inactive. */
211	uint32_t cNsInterval;
212	/** The current timer frequency. This is set to 0 when inactive. */
213	uint32_t uTimerHz;
214	/** The current max frequency reported by the EMTs.
215	* This gets historicize and reset by the timer callback. This is
216	* read without holding the spinlock, so needs atomic updating. */
217	uint32_t volatile uDesiredHz;
218	/** Whether the timer was started or not. */
219	bool volatile fStarted;
220	/** Set if we're starting timer. */
221	bool volatile fStarting;
222	/** The index of the next history entry (mod it). */
223	uint32_t iHzHistory;
224	/** Historicized uDesiredHz values. The array wraps around, new entries
225	* are added at iHzHistory. This is updated approximately every
226	* GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
227	uint32_t aHzHistory[8];
228	/** Statistics counter for recording the number of interval changes. */
229	uint32_t cChanges;
230	/** Statistics counter for recording the number of timer starts. */
231	uint32_t cStarts;
232	} Ppt;
233	#endif /* GVMM_SCHED_WITH_PPT */
234
235	} GVMMHOSTCPU;
236	/** Pointer to the per host CPU GVMM data. */
237	typedef GVMMHOSTCPU *PGVMMHOSTCPU;
238	/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
239	#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
240	/** The interval on history entry should cover (approximately) give in
241	* nanoseconds. */
242	#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
243
244
245	/**
246	* The GVMM instance data.
247	*/
248	typedef struct GVMM
249	{
250	/** Eyecatcher / magic. */
251	uint32_t u32Magic;
252	/** The index of the head of the free handle chain. (0 is nil.) */
253	uint16_t volatile iFreeHead;
254	/** The index of the head of the active handle chain. (0 is nil.) */
255	uint16_t volatile iUsedHead;
256	/** The number of VMs. */
257	uint16_t volatile cVMs;
258	/** Alignment padding. */
259	uint16_t u16Reserved;
260	/** The number of EMTs. */
261	uint32_t volatile cEMTs;
262	/** The number of EMTs that have halted in GVMMR0SchedHalt. */
263	uint32_t volatile cHaltedEMTs;
264	/** Mini lock for restricting early wake-ups to one thread. */
265	bool volatile fDoingEarlyWakeUps;
266	bool afPadding[3]; /*< explicit alignment padding. /
267	/** When the next halted or sleeping EMT will wake up.
268	* This is set to 0 when it needs recalculating and to UINT64_MAX when
269	* there are no halted or sleeping EMTs in the GVMM. */
270	uint64_t uNsNextEmtWakeup;
271	/** The lock used to serialize VM creation, destruction and associated events that
272	* isn't performance critical. Owners may acquire the list lock. */
273	RTCRITSECT CreateDestroyLock;
274	/** The lock used to serialize used list updates and accesses.
275	* This indirectly includes scheduling since the scheduler will have to walk the
276	* used list to examin running VMs. Owners may not acquire any other locks. */
277	RTCRITSECTRW UsedLock;
278	/** The handle array.
279	* The size of this array defines the maximum number of currently running VMs.
280	* The first entry is unused as it represents the NIL handle. */
281	GVMHANDLE aHandles[GVMM_MAX_HANDLES];
282
283	/** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
284	* The number of EMTs that means we no longer consider ourselves alone on a
285	* CPU/Core.
286	*/
287	uint32_t cEMTsMeansCompany;
288	/** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
289	* The minimum sleep time for when we're alone, in nano seconds.
290	*/
291	uint32_t nsMinSleepAlone;
292	/** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
293	* The minimum sleep time for when we've got company, in nano seconds.
294	*/
295	uint32_t nsMinSleepCompany;
296	/** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
297	* The limit for the first round of early wake-ups, given in nano seconds.
298	*/
299	uint32_t nsEarlyWakeUp1;
300	/** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
301	* The limit for the second round of early wake-ups, given in nano seconds.
302	*/
303	uint32_t nsEarlyWakeUp2;
304
305	/** Set if we're doing early wake-ups.
306	* This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
307	bool volatile fDoEarlyWakeUps;
308
309	/** The number of entries in the host CPU array (aHostCpus). */
310	uint32_t cHostCpus;
311	/** Per host CPU data (variable length). */
312	GVMMHOSTCPU aHostCpus[1];
313	} GVMM;
314	AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
315	AssertCompileMemberAlignment(GVMM, UsedLock, 8);
316	AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
317	/** Pointer to the GVMM instance data. */
318	typedef GVMM *PGVMM;
319
320	/** The GVMM::u32Magic value (Charlie Haden). */
321	#define GVMM_MAGIC UINT32_C(0x19370806)
322
323
324
325	/*********************************************************************************************************************************
326	* Global Variables *
327	*********************************************************************************************************************************/
328	/** Pointer to the GVMM instance data.
329	* (Just my general dislike for global variables.) */
330	static PGVMM g_pGVMM = NULL;
331
332	/** Macro for obtaining and validating the g_pGVMM pointer.
333	* On failure it will return from the invoking function with the specified return value.
334	*
335	* @param pGVMM The name of the pGVMM variable.
336	* @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
337	* status codes.
338	*/
339	#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
340	do { \
341	(pGVMM) = g_pGVMM;\
342	AssertPtrReturn((pGVMM), (rc)); \
343	AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
344	} while (0)
345
346	/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
347	* On failure it will return from the invoking function.
348	*
349	* @param pGVMM The name of the pGVMM variable.
350	*/
351	#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
352	do { \
353	(pGVMM) = g_pGVMM;\
354	AssertPtrReturnVoid((pGVMM)); \
355	AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
356	} while (0)
357
358
359	/*********************************************************************************************************************************
360	* Internal Functions *
361	*********************************************************************************************************************************/
362	static void gvmmR0InitPerVMData(PGVM pGVM);
363	static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void pvObj, void pvGVMM, void *pvHandle);
364	static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock);
365	static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM);
366
367	#ifdef GVMM_SCHED_WITH_PPT
368	static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
369	#endif
370
371
372	/**
373	* Initializes the GVMM.
374	*
375	* This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
376	*
377	* @returns VBox status code.
378	*/
379	GVMMR0DECL(int) GVMMR0Init(void)
380	{
381	LogFlow(("GVMMR0Init:\n"));
382
383	/*
384	* Allocate and initialize the instance data.
385	*/
386	uint32_t cHostCpus = RTMpGetArraySize();
387	AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
388
389	PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
390	if (!pGVMM)
391	return VERR_NO_MEMORY;
392	int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
393	"GVMM-CreateDestroyLock");
394	if (RT_SUCCESS(rc))
395	{
396	rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
397	if (RT_SUCCESS(rc))
398	{
399	pGVMM->u32Magic = GVMM_MAGIC;
400	pGVMM->iUsedHead = 0;
401	pGVMM->iFreeHead = 1;
402
403	/* the nil handle */
404	pGVMM->aHandles[0].iSelf = 0;
405	pGVMM->aHandles[0].iNext = 0;
406
407	/* the tail */
408	unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
409	pGVMM->aHandles[i].iSelf = i;
410	pGVMM->aHandles[i].iNext = 0; /* nil */
411
412	/* the rest */
413	while (i-- > 1)
414	{
415	pGVMM->aHandles[i].iSelf = i;
416	pGVMM->aHandles[i].iNext = i + 1;
417	}
418
419	/* The default configuration values. */
420	uint32_t cNsResolution = RTSemEventMultiGetResolution();
421	pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
422	if (cNsResolution >= 5*RT_NS_100US)
423	{
424	pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) /; /* @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
425	pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
426	pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
427	pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
428	}
429	else if (cNsResolution > RT_NS_100US)
430	{
431	pGVMM->nsMinSleepAlone = cNsResolution / 2;
432	pGVMM->nsMinSleepCompany = cNsResolution / 4;
433	pGVMM->nsEarlyWakeUp1 = 0;
434	pGVMM->nsEarlyWakeUp2 = 0;
435	}
436	else
437	{
438	pGVMM->nsMinSleepAlone = 2000;
439	pGVMM->nsMinSleepCompany = 2000;
440	pGVMM->nsEarlyWakeUp1 = 0;
441	pGVMM->nsEarlyWakeUp2 = 0;
442	}
443	pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
444
445	/* The host CPU data. */
446	pGVMM->cHostCpus = cHostCpus;
447	uint32_t iCpu = cHostCpus;
448	RTCPUSET PossibleSet;
449	RTMpGetSet(&PossibleSet);
450	while (iCpu-- > 0)
451	{
452	pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
453	#ifdef GVMM_SCHED_WITH_PPT
454	pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
455	pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
456	pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not that important) */
457	pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
458	//pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
459	//pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
460	//pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
461	//pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
462	//pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
463	//pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
464	//pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
465	//pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
466	#endif
467
468	if (RTCpuSetIsMember(&PossibleSet, iCpu))
469	{
470	pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
471	pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
472
473	#ifdef GVMM_SCHED_WITH_PPT
474	rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
475	5010001000 /* whatever */,
476	RTTIMER_FLAGS_CPU(iCpu) \| RTTIMER_FLAGS_HIGH_RES,
477	gvmmR0SchedPeriodicPreemptionTimerCallback,
478	&pGVMM->aHostCpus[iCpu]);
479	if (RT_SUCCESS(rc))
480	rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
481	if (RT_FAILURE(rc))
482	{
483	while (iCpu < cHostCpus)
484	{
485	RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
486	RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
487	pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
488	iCpu++;
489	}
490	break;
491	}
492	#endif
493	}
494	else
495	{
496	pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
497	pGVMM->aHostCpus[iCpu].u32Magic = 0;
498	}
499	}
500	if (RT_SUCCESS(rc))
501	{
502	g_pGVMM = pGVMM;
503	LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
504	return VINF_SUCCESS;
505	}
506
507	/* bail out. */
508	RTCritSectRwDelete(&pGVMM->UsedLock);
509	}
510	RTCritSectDelete(&pGVMM->CreateDestroyLock);
511	}
512
513	RTMemFree(pGVMM);
514	return rc;
515	}
516
517
518	/**
519	* Terminates the GVM.
520	*
521	* This is called while owning the loader semaphore (see supdrvLdrFree()).
522	* And unless something is wrong, there should be absolutely no VMs
523	* registered at this point.
524	*/
525	GVMMR0DECL(void) GVMMR0Term(void)
526	{
527	LogFlow(("GVMMR0Term:\n"));
528
529	PGVMM pGVMM = g_pGVMM;
530	g_pGVMM = NULL;
531	if (RT_UNLIKELY(!VALID_PTR(pGVMM)))
532	{
533	SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
534	return;
535	}
536
537	/*
538	* First of all, stop all active timers.
539	*/
540	uint32_t cActiveTimers = 0;
541	uint32_t iCpu = pGVMM->cHostCpus;
542	while (iCpu-- > 0)
543	{
544	ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
545	#ifdef GVMM_SCHED_WITH_PPT
546	if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
547	&& RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
548	cActiveTimers++;
549	#endif
550	}
551	if (cActiveTimers)
552	RTThreadSleep(1); /* fudge */
553
554	/*
555	* Invalidate the and free resources.
556	*/
557	pGVMM->u32Magic = ~GVMM_MAGIC;
558	RTCritSectRwDelete(&pGVMM->UsedLock);
559	RTCritSectDelete(&pGVMM->CreateDestroyLock);
560
561	pGVMM->iFreeHead = 0;
562	if (pGVMM->iUsedHead)
563	{
564	SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
565	pGVMM->iUsedHead = 0;
566	}
567
568	#ifdef GVMM_SCHED_WITH_PPT
569	iCpu = pGVMM->cHostCpus;
570	while (iCpu-- > 0)
571	{
572	RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
573	pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
574	RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
575	pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
576	}
577	#endif
578
579	RTMemFree(pGVMM);
580	}
581
582
583	/**
584	* A quick hack for setting global config values.
585	*
586	* @returns VBox status code.
587	*
588	* @param pSession The session handle. Used for authentication.
589	* @param pszName The variable name.
590	* @param u64Value The new value.
591	*/
592	GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
593	{
594	/*
595	* Validate input.
596	*/
597	PGVMM pGVMM;
598	GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
599	AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
600	AssertPtrReturn(pszName, VERR_INVALID_POINTER);
601
602	/*
603	* String switch time!
604	*/
605	if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
606	return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
607	int rc = VINF_SUCCESS;
608	pszName += sizeof("/GVMM/") - 1;
609	if (!strcmp(pszName, "cEMTsMeansCompany"))
610	{
611	if (u64Value <= UINT32_MAX)
612	pGVMM->cEMTsMeansCompany = u64Value;
613	else
614	rc = VERR_OUT_OF_RANGE;
615	}
616	else if (!strcmp(pszName, "MinSleepAlone"))
617	{
618	if (u64Value <= RT_NS_100MS)
619	pGVMM->nsMinSleepAlone = u64Value;
620	else
621	rc = VERR_OUT_OF_RANGE;
622	}
623	else if (!strcmp(pszName, "MinSleepCompany"))
624	{
625	if (u64Value <= RT_NS_100MS)
626	pGVMM->nsMinSleepCompany = u64Value;
627	else
628	rc = VERR_OUT_OF_RANGE;
629	}
630	else if (!strcmp(pszName, "EarlyWakeUp1"))
631	{
632	if (u64Value <= RT_NS_100MS)
633	{
634	pGVMM->nsEarlyWakeUp1 = u64Value;
635	pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
636	}
637	else
638	rc = VERR_OUT_OF_RANGE;
639	}
640	else if (!strcmp(pszName, "EarlyWakeUp2"))
641	{
642	if (u64Value <= RT_NS_100MS)
643	{
644	pGVMM->nsEarlyWakeUp2 = u64Value;
645	pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
646	}
647	else
648	rc = VERR_OUT_OF_RANGE;
649	}
650	else
651	rc = VERR_CFGM_VALUE_NOT_FOUND;
652	return rc;
653	}
654
655
656	/**
657	* A quick hack for getting global config values.
658	*
659	* @returns VBox status code.
660	*
661	* @param pSession The session handle. Used for authentication.
662	* @param pszName The variable name.
663	* @param pu64Value Where to return the value.
664	*/
665	GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char pszName, uint64_t pu64Value)
666	{
667	/*
668	* Validate input.
669	*/
670	PGVMM pGVMM;
671	GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
672	AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
673	AssertPtrReturn(pszName, VERR_INVALID_POINTER);
674	AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
675
676	/*
677	* String switch time!
678	*/
679	if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
680	return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
681	int rc = VINF_SUCCESS;
682	pszName += sizeof("/GVMM/") - 1;
683	if (!strcmp(pszName, "cEMTsMeansCompany"))
684	*pu64Value = pGVMM->cEMTsMeansCompany;
685	else if (!strcmp(pszName, "MinSleepAlone"))
686	*pu64Value = pGVMM->nsMinSleepAlone;
687	else if (!strcmp(pszName, "MinSleepCompany"))
688	*pu64Value = pGVMM->nsMinSleepCompany;
689	else if (!strcmp(pszName, "EarlyWakeUp1"))
690	*pu64Value = pGVMM->nsEarlyWakeUp1;
691	else if (!strcmp(pszName, "EarlyWakeUp2"))
692	*pu64Value = pGVMM->nsEarlyWakeUp2;
693	else
694	rc = VERR_CFGM_VALUE_NOT_FOUND;
695	return rc;
696	}
697
698
699	/**
700	* Acquire the 'used' lock in shared mode.
701	*
702	* This prevents destruction of the VM while we're in ring-0.
703	*
704	* @returns IPRT status code, see RTSemFastMutexRequest.
705	* @param a_pGVMM The GVMM instance data.
706	* @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
707	*/
708	#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
709
710	/**
711	* Release the 'used' lock in when owning it in shared mode.
712	*
713	* @returns IPRT status code, see RTSemFastMutexRequest.
714	* @param a_pGVMM The GVMM instance data.
715	* @sa GVMMR0_USED_SHARED_LOCK
716	*/
717	#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
718
719	/**
720	* Acquire the 'used' lock in exclusive mode.
721	*
722	* Only use this function when making changes to the used list.
723	*
724	* @returns IPRT status code, see RTSemFastMutexRequest.
725	* @param a_pGVMM The GVMM instance data.
726	* @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
727	*/
728	#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
729
730	/**
731	* Release the 'used' lock when owning it in exclusive mode.
732	*
733	* @returns IPRT status code, see RTSemFastMutexRelease.
734	* @param a_pGVMM The GVMM instance data.
735	* @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
736	*/
737	#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
738
739
740	/**
741	* Try acquire the 'create & destroy' lock.
742	*
743	* @returns IPRT status code, see RTSemFastMutexRequest.
744	* @param pGVMM The GVMM instance data.
745	*/
746	DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
747	{
748	LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
749	int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
750	LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
751	return rc;
752	}
753
754
755	/**
756	* Release the 'create & destroy' lock.
757	*
758	* @returns IPRT status code, see RTSemFastMutexRequest.
759	* @param pGVMM The GVMM instance data.
760	*/
761	DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
762	{
763	LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
764	int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
765	AssertRC(rc);
766	return rc;
767	}
768
769
770	/**
771	* Request wrapper for the GVMMR0CreateVM API.
772	*
773	* @returns VBox status code.
774	* @param pReq The request buffer.
775	* @param pSession The session handle. The VM will be associated with this.
776	*/
777	GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
778	{
779	/*
780	* Validate the request.
781	*/
782	if (!VALID_PTR(pReq))
783	return VERR_INVALID_POINTER;
784	if (pReq->Hdr.cbReq != sizeof(*pReq))
785	return VERR_INVALID_PARAMETER;
786	if (pReq->pSession != pSession)
787	return VERR_INVALID_POINTER;
788
789	/*
790	* Execute it.
791	*/
792	PVM pVM;
793	pReq->pVMR0 = NULL;
794	pReq->pVMR3 = NIL_RTR3PTR;
795	int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pVM);
796	if (RT_SUCCESS(rc))
797	{
798	pReq->pVMR0 = pVM;
799	pReq->pVMR3 = pVM->pVMR3;
800	}
801	return rc;
802	}
803
804
805	/**
806	* Allocates the VM structure and registers it with GVM.
807	*
808	* The caller will become the VM owner and there by the EMT.
809	*
810	* @returns VBox status code.
811	* @param pSession The support driver session.
812	* @param cCpus Number of virtual CPUs for the new VM.
813	* @param ppVM Where to store the pointer to the VM structure.
814	*
815	* @thread EMT.
816	*/
817	GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppVM)
818	{
819	LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
820	PGVMM pGVMM;
821	GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
822
823	AssertPtrReturn(ppVM, VERR_INVALID_POINTER);
824	*ppVM = NULL;
825
826	if ( cCpus == 0
827	\|\| cCpus > VMM_MAX_CPU_COUNT)
828	return VERR_INVALID_PARAMETER;
829
830	RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
831	AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
832	RTPROCESS ProcId = RTProcSelf();
833	AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
834
835	/*
836	* The whole allocation process is protected by the lock.
837	*/
838	int rc = gvmmR0CreateDestroyLock(pGVMM);
839	AssertRCReturn(rc, rc);
840
841	/*
842	* Only one VM per session.
843	*/
844	if (SUPR0GetSessionVM(pSession) != NULL)
845	{
846	gvmmR0CreateDestroyUnlock(pGVMM);
847	SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
848	return VERR_ALREADY_EXISTS;
849	}
850
851	/*
852	* Allocate a handle first so we don't waste resources unnecessarily.
853	*/
854	uint16_t iHandle = pGVMM->iFreeHead;
855	if (iHandle)
856	{
857	PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
858
859	/* consistency checks, a bit paranoid as always. */
860	if ( !pHandle->pVM
861	&& !pHandle->pGVM
862	&& !pHandle->pvObj
863	&& pHandle->iSelf == iHandle)
864	{
865	pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
866	if (pHandle->pvObj)
867	{
868	/*
869	* Move the handle from the free to used list and perform permission checks.
870	*/
871	rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
872	AssertRC(rc);
873
874	pGVMM->iFreeHead = pHandle->iNext;
875	pHandle->iNext = pGVMM->iUsedHead;
876	pGVMM->iUsedHead = iHandle;
877	pGVMM->cVMs++;
878
879	pHandle->pVM = NULL;
880	pHandle->pGVM = NULL;
881	pHandle->pSession = pSession;
882	pHandle->hEMT0 = NIL_RTNATIVETHREAD;
883	pHandle->ProcId = NIL_RTPROCESS;
884
885	GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
886
887	rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
888	if (RT_SUCCESS(rc))
889	{
890	/*
891	* Allocate the global VM structure (GVM) and initialize it.
892	*/
893	PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]));
894	if (pGVM)
895	{
896	pGVM->u32Magic = GVM_MAGIC;
897	pGVM->hSelf = iHandle;
898	pGVM->pVM = NULL;
899	pGVM->cCpus = cCpus;
900	pGVM->pSession = pSession;
901
902	gvmmR0InitPerVMData(pGVM);
903	GMMR0InitPerVMData(pGVM);
904
905	/*
906	* Allocate the shared VM structure and associated page array.
907	*/
908	const uint32_t cbVM = RT_UOFFSETOF_DYN(VM, aCpus[cCpus]);
909	const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
910	rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
911	if (RT_SUCCESS(rc))
912	{
913	PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM);
914	memset(pVM, 0, cPages << PAGE_SHIFT);
915	pVM->enmVMState = VMSTATE_CREATING;
916	pVM->pVMR0 = pVM;
917	pVM->pSession = pSession;
918	pVM->hSelf = iHandle;
919	pVM->cbSelf = cbVM;
920	pVM->cCpus = cCpus;
921	pVM->uCpuExecutionCap = 100; /* default is no cap. */
922	pVM->offVMCPU = RT_UOFFSETOF_DYN(VM, aCpus);
923	AssertCompileMemberAlignment(VM, cpum, 64);
924	AssertCompileMemberAlignment(VM, tm, 64);
925	AssertCompileMemberAlignment(VM, aCpus, PAGE_SIZE);
926
927	rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
928	if (RT_SUCCESS(rc))
929	{
930	PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
931	for (uint32_t iPage = 0; iPage < cPages; iPage++)
932	{
933	paPages[iPage].uReserved = 0;
934	paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
935	Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
936	}
937
938	/*
939	* Map them into ring-3.
940	*/
941	rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
942	RTMEM_PROT_READ \| RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
943	if (RT_SUCCESS(rc))
944	{
945	PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
946	pVM->pVMR3 = pVMR3;
947	AssertPtr((void *)pVMR3);
948
949	/* Initialize all the VM pointers. */
950	for (VMCPUID i = 0; i < cCpus; i++)
951	{
952	pVM->aCpus[i].idCpu = i;
953	pVM->aCpus[i].pVMR0 = pVM;
954	pVM->aCpus[i].pVMR3 = pVMR3;
955	pVM->aCpus[i].idHostCpu = NIL_RTCPUID;
956	pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
957	}
958
959	rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
960	0 /* uAlignment */, RTMEM_PROT_READ \| RTMEM_PROT_WRITE,
961	NIL_RTR0PROCESS);
962	if (RT_SUCCESS(rc))
963	{
964	pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
965	AssertPtr((void *)pVM->paVMPagesR3);
966
967	/* complete the handle - take the UsedLock sem just to be careful. */
968	rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
969	AssertRC(rc);
970
971	pHandle->pVM = pVM;
972	pHandle->pGVM = pGVM;
973	pHandle->hEMT0 = hEMT0;
974	pHandle->ProcId = ProcId;
975	pGVM->pVM = pVM;
976	pGVM->pVMR3 = pVMR3;
977	pGVM->aCpus[0].hEMT = hEMT0;
978	pVM->aCpus[0].hNativeThreadR0 = hEMT0;
979	pGVMM->cEMTs += cCpus;
980
981	for (VMCPUID i = 0; i < cCpus; i++)
982	{
983	pGVM->aCpus[i].pVCpu = &pVM->aCpus[i];
984	pGVM->aCpus[i].pVM = pVM;
985	}
986
987	/* Associate it with the session and create the context hook for EMT0. */
988	rc = SUPR0SetSessionVM(pSession, pGVM, pVM);
989	if (RT_SUCCESS(rc))
990	{
991	rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[0]);
992	if (RT_SUCCESS(rc))
993	{
994	/*
995	* Done!
996	*/
997	VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pVM, ProcId, (void *)hEMT0, cCpus);
998
999	GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1000	gvmmR0CreateDestroyUnlock(pGVMM);
1001
1002	CPUMR0RegisterVCpuThread(&pVM->aCpus[0]);
1003
1004	*ppVM = pVM;
1005	Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVMR3, pGVM, iHandle));
1006	return VINF_SUCCESS;
1007	}
1008
1009	SUPR0SetSessionVM(pSession, NULL, NULL);
1010	}
1011	GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1012	}
1013
1014	RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1015	pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1016	}
1017	RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
1018	pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1019	}
1020	RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
1021	pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1022	}
1023	}
1024	}
1025	/* else: The user wasn't permitted to create this VM. */
1026
1027	/*
1028	* The handle will be freed by gvmmR0HandleObjDestructor as we release the
1029	* object reference here. A little extra mess because of non-recursive lock.
1030	*/
1031	void *pvObj = pHandle->pvObj;
1032	pHandle->pvObj = NULL;
1033	gvmmR0CreateDestroyUnlock(pGVMM);
1034
1035	SUPR0ObjRelease(pvObj, pSession);
1036
1037	SUPR0Printf("GVMMR0CreateVM: failed, rc=%d\n", rc);
1038	return rc;
1039	}
1040
1041	rc = VERR_NO_MEMORY;
1042	}
1043	else
1044	rc = VERR_GVMM_IPE_1;
1045	}
1046	else
1047	rc = VERR_GVM_TOO_MANY_VMS;
1048
1049	gvmmR0CreateDestroyUnlock(pGVMM);
1050	return rc;
1051	}
1052
1053
1054	/**
1055	* Initializes the per VM data belonging to GVMM.
1056	*
1057	* @param pGVM Pointer to the global VM structure.
1058	*/
1059	static void gvmmR0InitPerVMData(PGVM pGVM)
1060	{
1061	AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1062	AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1063	pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1064	pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1065	pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1066	pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1067	pGVM->gvmm.s.fDoneVMMR0Init = false;
1068	pGVM->gvmm.s.fDoneVMMR0Term = false;
1069
1070	for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1071	{
1072	pGVM->aCpus[i].idCpu = i;
1073	pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1074	pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1075	pGVM->aCpus[i].pGVM = pGVM;
1076	pGVM->aCpus[i].pVCpu = NULL;
1077	pGVM->aCpus[i].pVM = NULL;
1078	}
1079	}
1080
1081
1082	/**
1083	* Does the VM initialization.
1084	*
1085	* @returns VBox status code.
1086	* @param pGVM The global (ring-0) VM structure.
1087	*/
1088	GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1089	{
1090	LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1091
1092	int rc = VERR_INTERNAL_ERROR_3;
1093	if ( !pGVM->gvmm.s.fDoneVMMR0Init
1094	&& pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1095	{
1096	for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1097	{
1098	rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1099	if (RT_FAILURE(rc))
1100	{
1101	pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1102	break;
1103	}
1104	}
1105	}
1106	else
1107	rc = VERR_WRONG_ORDER;
1108
1109	LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1110	return rc;
1111	}
1112
1113
1114	/**
1115	* Indicates that we're done with the ring-0 initialization
1116	* of the VM.
1117	*
1118	* @param pGVM The global (ring-0) VM structure.
1119	* @thread EMT(0)
1120	*/
1121	GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1122	{
1123	/* Set the indicator. */
1124	pGVM->gvmm.s.fDoneVMMR0Init = true;
1125	}
1126
1127
1128	/**
1129	* Indicates that we're doing the ring-0 termination of the VM.
1130	*
1131	* @returns true if termination hasn't been done already, false if it has.
1132	* @param pGVM Pointer to the global VM structure. Optional.
1133	* @thread EMT(0) or session cleanup thread.
1134	*/
1135	GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1136	{
1137	/* Validate the VM structure, state and handle. */
1138	AssertPtrReturn(pGVM, false);
1139
1140	/* Set the indicator. */
1141	if (pGVM->gvmm.s.fDoneVMMR0Term)
1142	return false;
1143	pGVM->gvmm.s.fDoneVMMR0Term = true;
1144	return true;
1145	}
1146
1147
1148	/**
1149	* Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1150	*
1151	* This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1152	* and the caller is not the EMT thread, unfortunately. For security reasons, it
1153	* would've been nice if the caller was actually the EMT thread or that we somehow
1154	* could've associated the calling thread with the VM up front.
1155	*
1156	* @returns VBox status code.
1157	* @param pGVM The global (ring-0) VM structure.
1158	* @param pVM The cross context VM structure.
1159	*
1160	* @thread EMT(0) if it's associated with the VM, otherwise any thread.
1161	*/
1162	GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM, PVM pVM)
1163	{
1164	LogFlow(("GVMMR0DestroyVM: pGVM=%p pVM=%p\n", pGVM, pVM));
1165	PGVMM pGVMM;
1166	GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1167
1168	/*
1169	* Validate the VM structure, state and caller.
1170	*/
1171	AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1172	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1173	AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1174	AssertReturn(pGVM->pVM == pVM, VERR_INVALID_POINTER);
1175	AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState),
1176	VERR_WRONG_ORDER);
1177
1178	uint32_t hGVM = pGVM->hSelf;
1179	ASMCompilerBarrier();
1180	AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1181	AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1182
1183	PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1184	AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1185
1186	RTPROCESS ProcId = RTProcSelf();
1187	RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1188	AssertReturn( ( pHandle->hEMT0 == hSelf
1189	&& pHandle->ProcId == ProcId)
1190	\|\| pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1191
1192	/*
1193	* Lookup the handle and destroy the object.
1194	* Since the lock isn't recursive and we'll have to leave it before dereferencing the
1195	* object, we take some precautions against racing callers just in case...
1196	*/
1197	int rc = gvmmR0CreateDestroyLock(pGVMM);
1198	AssertRC(rc);
1199
1200	/* Be careful here because we might theoretically be racing someone else cleaning up. */
1201	if ( pHandle->pVM == pVM
1202	&& ( ( pHandle->hEMT0 == hSelf
1203	&& pHandle->ProcId == ProcId)
1204	\|\| pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1205	&& VALID_PTR(pHandle->pvObj)
1206	&& VALID_PTR(pHandle->pSession)
1207	&& VALID_PTR(pHandle->pGVM)
1208	&& pHandle->pGVM->u32Magic == GVM_MAGIC)
1209	{
1210	/* Check that other EMTs have deregistered. */
1211	uint32_t cNotDeregistered = 0;
1212	for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1213	cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */
1214	if (cNotDeregistered == 0)
1215	{
1216	/* Grab the object pointer. */
1217	void *pvObj = pHandle->pvObj;
1218	pHandle->pvObj = NULL;
1219	gvmmR0CreateDestroyUnlock(pGVMM);
1220
1221	SUPR0ObjRelease(pvObj, pHandle->pSession);
1222	}
1223	else
1224	{
1225	gvmmR0CreateDestroyUnlock(pGVMM);
1226	rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1227	}
1228	}
1229	else
1230	{
1231	SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n",
1232	pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf);
1233	gvmmR0CreateDestroyUnlock(pGVMM);
1234	rc = VERR_GVMM_IPE_2;
1235	}
1236
1237	return rc;
1238	}
1239
1240
1241	/**
1242	* Performs VM cleanup task as part of object destruction.
1243	*
1244	* @param pGVM The GVM pointer.
1245	*/
1246	static void gvmmR0CleanupVM(PGVM pGVM)
1247	{
1248	if ( pGVM->gvmm.s.fDoneVMMR0Init
1249	&& !pGVM->gvmm.s.fDoneVMMR0Term)
1250	{
1251	if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1252	&& RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM)
1253	{
1254	LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1255	VMMR0TermVM(pGVM, pGVM->pVM, NIL_VMCPUID);
1256	}
1257	else
1258	AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM));
1259	}
1260
1261	GMMR0CleanupVM(pGVM);
1262	#ifdef VBOX_WITH_NEM_R0
1263	NEMR0CleanupVM(pGVM);
1264	#endif
1265
1266	AssertCompile((uintptr_t)NIL_RTTHREADCTXHOOK == 0); /* Depends on zero initialized memory working for NIL at the moment. */
1267	for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++)
1268	{
1269	/** @todo Can we busy wait here for all thread-context hooks to be
1270	* deregistered before releasing (destroying) it? Only until we find a
1271	* solution for not deregistering hooks everytime we're leaving HMR0
1272	* context. */
1273	VMMR0ThreadCtxHookDestroyForEmt(&pGVM->pVM->aCpus[idCpu]);
1274	}
1275	}
1276
1277
1278	/**
1279	* @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1280	*
1281	* pvUser1 is the GVM instance pointer.
1282	* pvUser2 is the handle pointer.
1283	*/
1284	static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void pvObj, void pvUser1, void *pvUser2)
1285	{
1286	LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1287
1288	NOREF(pvObj);
1289
1290	/*
1291	* Some quick, paranoid, input validation.
1292	*/
1293	PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1294	AssertPtr(pHandle);
1295	PGVMM pGVMM = (PGVMM)pvUser1;
1296	Assert(pGVMM == g_pGVMM);
1297	const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1298	if ( !iHandle
1299	\|\| iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1300	\|\| iHandle != pHandle->iSelf)
1301	{
1302	SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1303	return;
1304	}
1305
1306	int rc = gvmmR0CreateDestroyLock(pGVMM);
1307	AssertRC(rc);
1308	rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1309	AssertRC(rc);
1310
1311	/*
1312	* This is a tad slow but a doubly linked list is too much hassle.
1313	*/
1314	if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1315	{
1316	SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1317	GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1318	gvmmR0CreateDestroyUnlock(pGVMM);
1319	return;
1320	}
1321
1322	if (pGVMM->iUsedHead == iHandle)
1323	pGVMM->iUsedHead = pHandle->iNext;
1324	else
1325	{
1326	uint16_t iPrev = pGVMM->iUsedHead;
1327	int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1328	while (iPrev)
1329	{
1330	if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1331	{
1332	SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1333	GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1334	gvmmR0CreateDestroyUnlock(pGVMM);
1335	return;
1336	}
1337	if (RT_UNLIKELY(c-- <= 0))
1338	{
1339	iPrev = 0;
1340	break;
1341	}
1342
1343	if (pGVMM->aHandles[iPrev].iNext == iHandle)
1344	break;
1345	iPrev = pGVMM->aHandles[iPrev].iNext;
1346	}
1347	if (!iPrev)
1348	{
1349	SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1350	GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1351	gvmmR0CreateDestroyUnlock(pGVMM);
1352	return;
1353	}
1354
1355	Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1356	pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1357	}
1358	pHandle->iNext = 0;
1359	pGVMM->cVMs--;
1360
1361	/*
1362	* Do the global cleanup round.
1363	*/
1364	PGVM pGVM = pHandle->pGVM;
1365	if ( VALID_PTR(pGVM)
1366	&& pGVM->u32Magic == GVM_MAGIC)
1367	{
1368	pGVMM->cEMTs -= pGVM->cCpus;
1369
1370	if (pGVM->pSession)
1371	SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1372
1373	GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1374
1375	gvmmR0CleanupVM(pGVM);
1376
1377	/*
1378	* Do the GVMM cleanup - must be done last.
1379	*/
1380	/* The VM and VM pages mappings/allocations. */
1381	if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1382	{
1383	rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1384	pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1385	}
1386
1387	if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1388	{
1389	rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1390	pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1391	}
1392
1393	if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1394	{
1395	rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1396	pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1397	}
1398
1399	if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ)
1400	{
1401	rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc);
1402	pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1403	}
1404
1405	for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1406	{
1407	if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1408	{
1409	rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1410	pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1411	}
1412	}
1413
1414	/* the GVM structure itself. */
1415	pGVM->u32Magic \|= UINT32_C(0x80000000);
1416	RTMemFree(pGVM);
1417
1418	/* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1419	rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1420	AssertRC(rc);
1421	}
1422	/* else: GVMMR0CreateVM cleanup. */
1423
1424	/*
1425	* Free the handle.
1426	*/
1427	pHandle->iNext = pGVMM->iFreeHead;
1428	pGVMM->iFreeHead = iHandle;
1429	ASMAtomicWriteNullPtr(&pHandle->pGVM);
1430	ASMAtomicWriteNullPtr(&pHandle->pVM);
1431	ASMAtomicWriteNullPtr(&pHandle->pvObj);
1432	ASMAtomicWriteNullPtr(&pHandle->pSession);
1433	ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1434	ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1435
1436	GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1437	gvmmR0CreateDestroyUnlock(pGVMM);
1438	LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1439	}
1440
1441
1442	/**
1443	* Registers the calling thread as the EMT of a Virtual CPU.
1444	*
1445	* Note that VCPU 0 is automatically registered during VM creation.
1446	*
1447	* @returns VBox status code
1448	* @param pGVM The global (ring-0) VM structure.
1449	* @param pVM The cross context VM structure.
1450	* @param idCpu VCPU id to register the current thread as.
1451	*/
1452	GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1453	{
1454	AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1455
1456	/*
1457	* Validate the VM structure, state and handle.
1458	*/
1459	PGVMM pGVMM;
1460	int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /* fTakeUsedLock /); /* @todo take lock here. */
1461	if (RT_SUCCESS(rc))
1462	{
1463	if (idCpu < pGVM->cCpus)
1464	{
1465	/* Check that the EMT isn't already assigned to a thread. */
1466	if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
1467	{
1468	Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1469
1470	/* A thread may only be one EMT. */
1471	RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1472	for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1473	AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1474	if (RT_SUCCESS(rc))
1475	{
1476	/*
1477	* Do the assignment, then try setup the hook. Undo if that fails.
1478	*/
1479	pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1480
1481	rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[idCpu]);
1482	if (RT_SUCCESS(rc))
1483	CPUMR0RegisterVCpuThread(&pVM->aCpus[idCpu]);
1484	else
1485	pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1486	}
1487	}
1488	else
1489	rc = VERR_ACCESS_DENIED;
1490	}
1491	else
1492	rc = VERR_INVALID_CPU_ID;
1493	}
1494	return rc;
1495	}
1496
1497
1498	/**
1499	* Deregisters the calling thread as the EMT of a Virtual CPU.
1500	*
1501	* Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1502	*
1503	* @returns VBox status code
1504	* @param pGVM The global (ring-0) VM structure.
1505	* @param pVM The cross context VM structure.
1506	* @param idCpu VCPU id to register the current thread as.
1507	*/
1508	GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1509	{
1510	AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1511
1512	/*
1513	* Validate the VM structure, state and handle.
1514	*/
1515	PGVMM pGVMM;
1516	int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
1517	if (RT_SUCCESS(rc))
1518	{
1519	/*
1520	* Take the destruction lock and recheck the handle state to
1521	* prevent racing GVMMR0DestroyVM.
1522	*/
1523	gvmmR0CreateDestroyLock(pGVMM);
1524	uint32_t hSelf = pGVM->hSelf;
1525	ASMCompilerBarrier();
1526	if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1527	&& pGVMM->aHandles[hSelf].pvObj != NULL
1528	&& pGVMM->aHandles[hSelf].pGVM == pGVM)
1529	{
1530	/*
1531	* Do per-EMT cleanups.
1532	*/
1533	VMMR0ThreadCtxHookDestroyForEmt(&pVM->aCpus[idCpu]);
1534
1535	/*
1536	* Invalidate hEMT. We don't use NIL here as that would allow
1537	* GVMMR0RegisterVCpu to be called again, and we don't want that.
1538	*/
1539	AssertCompile(~(RTNATIVETHREAD)1 != NIL_RTNATIVETHREAD);
1540	pGVM->aCpus[idCpu].hEMT = ~(RTNATIVETHREAD)1;
1541	pVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1542	}
1543
1544	gvmmR0CreateDestroyUnlock(pGVMM);
1545	}
1546	return rc;
1547	}
1548
1549
1550	/**
1551	* Lookup a GVM structure by its handle.
1552	*
1553	* @returns The GVM pointer on success, NULL on failure.
1554	* @param hGVM The global VM handle. Asserts on bad handle.
1555	*/
1556	GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1557	{
1558	PGVMM pGVMM;
1559	GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1560
1561	/*
1562	* Validate.
1563	*/
1564	AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1565	AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1566
1567	/*
1568	* Look it up.
1569	*/
1570	PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1571	AssertPtrReturn(pHandle->pVM, NULL);
1572	AssertPtrReturn(pHandle->pvObj, NULL);
1573	PGVM pGVM = pHandle->pGVM;
1574	AssertPtrReturn(pGVM, NULL);
1575	AssertReturn(pGVM->pVM == pHandle->pVM, NULL);
1576
1577	return pHandle->pGVM;
1578	}
1579
1580
1581	/**
1582	* Lookup a GVM structure by the shared VM structure.
1583	*
1584	* The calling thread must be in the same process as the VM. All current lookups
1585	* are by threads inside the same process, so this will not be an issue.
1586	*
1587	* @returns VBox status code.
1588	* @param pVM The cross context VM structure.
1589	* @param ppGVM Where to store the GVM pointer.
1590	* @param ppGVMM Where to store the pointer to the GVMM instance data.
1591	* @param fTakeUsedLock Whether to take the used lock or not. We take it in
1592	* shared mode when requested.
1593	*
1594	* Be very careful if not taking the lock as it's
1595	* possible that the VM will disappear then!
1596	*
1597	* @remark This will not assert on an invalid pVM but try return silently.
1598	*/
1599	static int gvmmR0ByVM(PVM pVM, PGVM ppGVM, PGVMM ppGVMM, bool fTakeUsedLock)
1600	{
1601	RTPROCESS ProcId = RTProcSelf();
1602	PGVMM pGVMM;
1603	GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1604
1605	/*
1606	* Validate.
1607	*/
1608	if (RT_UNLIKELY( !VALID_PTR(pVM)
1609	\|\| ((uintptr_t)pVM & PAGE_OFFSET_MASK)))
1610	return VERR_INVALID_POINTER;
1611	if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING
1612	\|\| pVM->enmVMState >= VMSTATE_TERMINATED))
1613	return VERR_INVALID_POINTER;
1614
1615	uint16_t hGVM = pVM->hSelf;
1616	ASMCompilerBarrier();
1617	if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE
1618	\|\| hGVM >= RT_ELEMENTS(pGVMM->aHandles)))
1619	return VERR_INVALID_HANDLE;
1620
1621	/*
1622	* Look it up.
1623	*/
1624	PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1625	PGVM pGVM;
1626	if (fTakeUsedLock)
1627	{
1628	int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1629	AssertRCReturn(rc, rc);
1630
1631	pGVM = pHandle->pGVM;
1632	if (RT_UNLIKELY( pHandle->pVM != pVM
1633	\|\| pHandle->ProcId != ProcId
1634	\|\| !VALID_PTR(pHandle->pvObj)
1635	\|\| !VALID_PTR(pGVM)
1636	\|\| pGVM->pVM != pVM))
1637	{
1638	GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1639	return VERR_INVALID_HANDLE;
1640	}
1641	}
1642	else
1643	{
1644	if (RT_UNLIKELY(pHandle->pVM != pVM))
1645	return VERR_INVALID_HANDLE;
1646	if (RT_UNLIKELY(pHandle->ProcId != ProcId))
1647	return VERR_INVALID_HANDLE;
1648	if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj)))
1649	return VERR_INVALID_HANDLE;
1650
1651	pGVM = pHandle->pGVM;
1652	if (RT_UNLIKELY(!VALID_PTR(pGVM)))
1653	return VERR_INVALID_HANDLE;
1654	if (RT_UNLIKELY(pGVM->pVM != pVM))
1655	return VERR_INVALID_HANDLE;
1656	}
1657
1658	*ppGVM = pGVM;
1659	*ppGVMM = pGVMM;
1660	return VINF_SUCCESS;
1661	}
1662
1663
1664	/**
1665	* Fast look up a GVM structure by the cross context VM structure.
1666	*
1667	* This is mainly used a glue function, so performance is .
1668	*
1669	* @returns GVM on success, NULL on failure.
1670	* @param pVM The cross context VM structure. ASSUMES to be
1671	* reasonably valid, so we can do fewer checks than in
1672	* gvmmR0ByVM.
1673	*
1674	* @note Do not use this on pVM structures from userland!
1675	*/
1676	GVMMR0DECL(PGVM) GVMMR0FastGetGVMByVM(PVM pVM)
1677	{
1678	AssertPtr(pVM);
1679	Assert(!((uintptr_t)pVM & PAGE_OFFSET_MASK));
1680
1681	PGVMM pGVMM;
1682	GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1683
1684	/*
1685	* Validate.
1686	*/
1687	uint16_t hGVM = pVM->hSelf;
1688	ASMCompilerBarrier();
1689	AssertReturn(hGVM != NIL_GVM_HANDLE && hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1690
1691	/*
1692	* Look it up and check pVM against the value in the handle and GVM structures.
1693	*/
1694	PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1695	AssertReturn(pHandle->pVM == pVM, NULL);
1696
1697	PGVM pGVM = pHandle->pGVM;
1698	AssertPtrReturn(pGVM, NULL);
1699	AssertReturn(pGVM->pVM == pVM, NULL);
1700
1701	return pGVM;
1702	}
1703
1704
1705	/**
1706	* Check that the given GVM and VM structures match up.
1707	*
1708	* The calling thread must be in the same process as the VM. All current lookups
1709	* are by threads inside the same process, so this will not be an issue.
1710	*
1711	* @returns VBox status code.
1712	* @param pGVM The global (ring-0) VM structure.
1713	* @param pVM The cross context VM structure.
1714	* @param ppGVMM Where to store the pointer to the GVMM instance data.
1715	* @param fTakeUsedLock Whether to take the used lock or not. We take it in
1716	* shared mode when requested.
1717	*
1718	* Be very careful if not taking the lock as it's
1719	* possible that the VM will disappear then!
1720	*
1721	* @remark This will not assert on an invalid pVM but try return silently.
1722	*/
1723	static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1724	{
1725	/*
1726	* Check the pointers.
1727	*/
1728	int rc;
1729	if (RT_LIKELY(RT_VALID_PTR(pGVM)))
1730	{
1731	if (RT_LIKELY( RT_VALID_PTR(pVM)
1732	&& ((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0))
1733	{
1734	if (RT_LIKELY(pGVM->pVM == pVM))
1735	{
1736	/*
1737	* Get the pGVMM instance and check the VM handle.
1738	*/
1739	PGVMM pGVMM;
1740	GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1741
1742	uint16_t hGVM = pGVM->hSelf;
1743	if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1744	&& hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1745	{
1746	RTPROCESS const pidSelf = RTProcSelf();
1747	PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1748	if (fTakeUsedLock)
1749	{
1750	rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1751	AssertRCReturn(rc, rc);
1752	}
1753
1754	if (RT_LIKELY( pHandle->pGVM == pGVM
1755	&& pHandle->pVM == pVM
1756	&& pHandle->ProcId == pidSelf
1757	&& RT_VALID_PTR(pHandle->pvObj)))
1758	{
1759	/*
1760	* Some more VM data consistency checks.
1761	*/
1762	if (RT_LIKELY( pVM->cCpus == pGVM->cCpus
1763	&& pVM->hSelf == hGVM
1764	&& pVM->enmVMState >= VMSTATE_CREATING
1765	&& pVM->enmVMState <= VMSTATE_TERMINATED
1766	&& pVM->pVMR0 == pVM))
1767	{
1768	*ppGVMM = pGVMM;
1769	return VINF_SUCCESS;
1770	}
1771	}
1772
1773	if (fTakeUsedLock)
1774	GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1775	}
1776	}
1777	rc = VERR_INVALID_VM_HANDLE;
1778	}
1779	else
1780	rc = VERR_INVALID_POINTER;
1781	}
1782	else
1783	rc = VERR_INVALID_POINTER;
1784	return rc;
1785	}
1786
1787
1788	/**
1789	* Check that the given GVM and VM structures match up.
1790	*
1791	* The calling thread must be in the same process as the VM. All current lookups
1792	* are by threads inside the same process, so this will not be an issue.
1793	*
1794	* @returns VBox status code.
1795	* @param pGVM The global (ring-0) VM structure.
1796	* @param pVM The cross context VM structure.
1797	* @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
1798	* @param ppGVMM Where to store the pointer to the GVMM instance data.
1799	* @thread EMT
1800	*
1801	* @remarks This will assert in all failure paths.
1802	*/
1803	static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM)
1804	{
1805	/*
1806	* Check the pointers.
1807	*/
1808	AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1809
1810	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1811	AssertReturn(((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
1812	AssertReturn(pGVM->pVM == pVM, VERR_INVALID_VM_HANDLE);
1813
1814
1815	/*
1816	* Get the pGVMM instance and check the VM handle.
1817	*/
1818	PGVMM pGVMM;
1819	GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1820
1821	uint16_t hGVM = pGVM->hSelf;
1822	ASMCompilerBarrier();
1823	AssertReturn( hGVM != NIL_GVM_HANDLE
1824	&& hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1825
1826	RTPROCESS const pidSelf = RTProcSelf();
1827	PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1828	AssertReturn( pHandle->pGVM == pGVM
1829	&& pHandle->pVM == pVM
1830	&& pHandle->ProcId == pidSelf
1831	&& RT_VALID_PTR(pHandle->pvObj),
1832	VERR_INVALID_HANDLE);
1833
1834	/*
1835	* Check the EMT claim.
1836	*/
1837	RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
1838	AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1839	AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1840
1841	/*
1842	* Some more VM data consistency checks.
1843	*/
1844	AssertReturn(pVM->cCpus == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
1845	AssertReturn(pVM->hSelf == hGVM, VERR_INCONSISTENT_VM_HANDLE);
1846	AssertReturn(pVM->pVMR0 == pVM, VERR_INCONSISTENT_VM_HANDLE);
1847	AssertReturn( pVM->enmVMState >= VMSTATE_CREATING
1848	&& pVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
1849
1850	*ppGVMM = pGVMM;
1851	return VINF_SUCCESS;
1852	}
1853
1854
1855	/**
1856	* Validates a GVM/VM pair.
1857	*
1858	* @returns VBox status code.
1859	* @param pGVM The global (ring-0) VM structure.
1860	* @param pVM The cross context VM structure.
1861	*/
1862	GVMMR0DECL(int) GVMMR0ValidateGVMandVM(PGVM pGVM, PVM pVM)
1863	{
1864	PGVMM pGVMM;
1865	return gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /fTakeUsedLock/);
1866	}
1867
1868
1869
1870	/**
1871	* Validates a GVM/VM/EMT combo.
1872	*
1873	* @returns VBox status code.
1874	* @param pGVM The global (ring-0) VM structure.
1875	* @param pVM The cross context VM structure.
1876	* @param idCpu The Virtual CPU ID of the calling EMT.
1877	* @thread EMT(idCpu)
1878	*/
1879	GVMMR0DECL(int) GVMMR0ValidateGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1880	{
1881	PGVMM pGVMM;
1882	return gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
1883	}
1884
1885
1886	/**
1887	* Looks up the VM belonging to the specified EMT thread.
1888	*
1889	* This is used by the assertion machinery in VMMR0.cpp to avoid causing
1890	* unnecessary kernel panics when the EMT thread hits an assertion. The
1891	* call may or not be an EMT thread.
1892	*
1893	* @returns Pointer to the VM on success, NULL on failure.
1894	* @param hEMT The native thread handle of the EMT.
1895	* NIL_RTNATIVETHREAD means the current thread
1896	*/
1897	GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1898	{
1899	/*
1900	* No Assertions here as we're usually called in a AssertMsgN or
1901	* RTAssert* context.
1902	*/
1903	PGVMM pGVMM = g_pGVMM;
1904	if ( !VALID_PTR(pGVMM)
1905	\|\| pGVMM->u32Magic != GVMM_MAGIC)
1906	return NULL;
1907
1908	if (hEMT == NIL_RTNATIVETHREAD)
1909	hEMT = RTThreadNativeSelf();
1910	RTPROCESS ProcId = RTProcSelf();
1911
1912	/*
1913	* Search the handles in a linear fashion as we don't dare to take the lock (assert).
1914	*/
1915	/** @todo introduce some pid hash table here, please. */
1916	for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1917	{
1918	if ( pGVMM->aHandles[i].iSelf == i
1919	&& pGVMM->aHandles[i].ProcId == ProcId
1920	&& VALID_PTR(pGVMM->aHandles[i].pvObj)
1921	&& VALID_PTR(pGVMM->aHandles[i].pVM)
1922	&& VALID_PTR(pGVMM->aHandles[i].pGVM))
1923	{
1924	if (pGVMM->aHandles[i].hEMT0 == hEMT)
1925	return pGVMM->aHandles[i].pVM;
1926
1927	/* This is fearly safe with the current process per VM approach. */
1928	PGVM pGVM = pGVMM->aHandles[i].pGVM;
1929	VMCPUID const cCpus = pGVM->cCpus;
1930	ASMCompilerBarrier();
1931	if ( cCpus < 1
1932	\|\| cCpus > VMM_MAX_CPU_COUNT)
1933	continue;
1934	for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1935	if (pGVM->aCpus[idCpu].hEMT == hEMT)
1936	return pGVMM->aHandles[i].pVM;
1937	}
1938	}
1939	return NULL;
1940	}
1941
1942
1943	/**
1944	* Looks up the GVMCPU belonging to the specified EMT thread.
1945	*
1946	* This is used by the assertion machinery in VMMR0.cpp to avoid causing
1947	* unnecessary kernel panics when the EMT thread hits an assertion. The
1948	* call may or not be an EMT thread.
1949	*
1950	* @returns Pointer to the VM on success, NULL on failure.
1951	* @param hEMT The native thread handle of the EMT.
1952	* NIL_RTNATIVETHREAD means the current thread
1953	*/
1954	GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
1955	{
1956	/*
1957	* No Assertions here as we're usually called in a AssertMsgN,
1958	* RTAssert*, Log and LogRel contexts.
1959	*/
1960	PGVMM pGVMM = g_pGVMM;
1961	if ( !VALID_PTR(pGVMM)
1962	\|\| pGVMM->u32Magic != GVMM_MAGIC)
1963	return NULL;
1964
1965	if (hEMT == NIL_RTNATIVETHREAD)
1966	hEMT = RTThreadNativeSelf();
1967	RTPROCESS ProcId = RTProcSelf();
1968
1969	/*
1970	* Search the handles in a linear fashion as we don't dare to take the lock (assert).
1971	*/
1972	/** @todo introduce some pid hash table here, please. */
1973	for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1974	{
1975	if ( pGVMM->aHandles[i].iSelf == i
1976	&& pGVMM->aHandles[i].ProcId == ProcId
1977	&& VALID_PTR(pGVMM->aHandles[i].pvObj)
1978	&& VALID_PTR(pGVMM->aHandles[i].pVM)
1979	&& VALID_PTR(pGVMM->aHandles[i].pGVM))
1980	{
1981	PGVM pGVM = pGVMM->aHandles[i].pGVM;
1982	if (pGVMM->aHandles[i].hEMT0 == hEMT)
1983	return &pGVM->aCpus[0];
1984
1985	/* This is fearly safe with the current process per VM approach. */
1986	VMCPUID const cCpus = pGVM->cCpus;
1987	ASMCompilerBarrier();
1988	ASMCompilerBarrier();
1989	if ( cCpus < 1
1990	\|\| cCpus > VMM_MAX_CPU_COUNT)
1991	continue;
1992	for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1993	if (pGVM->aCpus[idCpu].hEMT == hEMT)
1994	return &pGVM->aCpus[idCpu];
1995	}
1996	}
1997	return NULL;
1998	}
1999
2000
2001	/**
2002	* This is will wake up expired and soon-to-be expired VMs.
2003	*
2004	* @returns Number of VMs that has been woken up.
2005	* @param pGVMM Pointer to the GVMM instance data.
2006	* @param u64Now The current time.
2007	*/
2008	static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
2009	{
2010	/*
2011	* Skip this if we've got disabled because of high resolution wakeups or by
2012	* the user.
2013	*/
2014	if (!pGVMM->fDoEarlyWakeUps)
2015	return 0;
2016
2017	/** @todo Rewrite this algorithm. See performance defect XYZ. */
2018
2019	/*
2020	* A cheap optimization to stop wasting so much time here on big setups.
2021	*/
2022	const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
2023	if ( pGVMM->cHaltedEMTs == 0
2024	\|\| uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
2025	return 0;
2026
2027	/*
2028	* Only one thread doing this at a time.
2029	*/
2030	if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
2031	return 0;
2032
2033	/*
2034	* The first pass will wake up VMs which have actually expired
2035	* and look for VMs that should be woken up in the 2nd and 3rd passes.
2036	*/
2037	const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
2038	uint64_t u64Min = UINT64_MAX;
2039	unsigned cWoken = 0;
2040	unsigned cHalted = 0;
2041	unsigned cTodo2nd = 0;
2042	unsigned cTodo3rd = 0;
2043	for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2044	i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2045	i = pGVMM->aHandles[i].iNext)
2046	{
2047	PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2048	if ( VALID_PTR(pCurGVM)
2049	&& pCurGVM->u32Magic == GVM_MAGIC)
2050	{
2051	for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2052	{
2053	PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2054	uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2055	if (u64)
2056	{
2057	if (u64 <= u64Now)
2058	{
2059	if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2060	{
2061	int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2062	AssertRC(rc);
2063	cWoken++;
2064	}
2065	}
2066	else
2067	{
2068	cHalted++;
2069	if (u64 <= uNsEarlyWakeUp1)
2070	cTodo2nd++;
2071	else if (u64 <= uNsEarlyWakeUp2)
2072	cTodo3rd++;
2073	else if (u64 < u64Min)
2074	u64 = u64Min;
2075	}
2076	}
2077	}
2078	}
2079	AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2080	}
2081
2082	if (cTodo2nd)
2083	{
2084	for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2085	i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2086	i = pGVMM->aHandles[i].iNext)
2087	{
2088	PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2089	if ( VALID_PTR(pCurGVM)
2090	&& pCurGVM->u32Magic == GVM_MAGIC)
2091	{
2092	for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2093	{
2094	PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2095	uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2096	if ( u64
2097	&& u64 <= uNsEarlyWakeUp1)
2098	{
2099	if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2100	{
2101	int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2102	AssertRC(rc);
2103	cWoken++;
2104	}
2105	}
2106	}
2107	}
2108	AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2109	}
2110	}
2111
2112	if (cTodo3rd)
2113	{
2114	for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2115	i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2116	i = pGVMM->aHandles[i].iNext)
2117	{
2118	PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2119	if ( VALID_PTR(pCurGVM)
2120	&& pCurGVM->u32Magic == GVM_MAGIC)
2121	{
2122	for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2123	{
2124	PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2125	uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2126	if ( u64
2127	&& u64 <= uNsEarlyWakeUp2)
2128	{
2129	if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2130	{
2131	int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2132	AssertRC(rc);
2133	cWoken++;
2134	}
2135	}
2136	}
2137	}
2138	AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2139	}
2140	}
2141
2142	/*
2143	* Set the minimum value.
2144	*/
2145	pGVMM->uNsNextEmtWakeup = u64Min;
2146
2147	ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2148	return cWoken;
2149	}
2150
2151
2152	/**
2153	* Halt the EMT thread.
2154	*
2155	* @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2156	* VERR_INTERRUPTED if a signal was scheduled for the thread.
2157	* @param pGVM The global (ring-0) VM structure.
2158	* @param pVM The cross context VM structure.
2159	* @param idCpu The Virtual CPU ID of the calling EMT.
2160	* @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2161	* @thread EMT(idCpu).
2162	*/
2163	GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PVM pVM, PGVMCPU pCurGVCpu, uint64_t u64ExpireGipTime)
2164	{
2165	LogFlow(("GVMMR0SchedHalt: pGVM=%p pVM=%p pCurGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2166	pGVM, pVM, pCurGVCpu, pCurGVCpu->idCpu, u64ExpireGipTime));
2167	GVMM_CHECK_SMAP_SETUP();
2168	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2169
2170	PGVMM pGVMM;
2171	GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2172
2173	pGVM->gvmm.s.StatsSched.cHaltCalls++;
2174	Assert(!pCurGVCpu->gvmm.s.u64HaltExpire);
2175
2176	/*
2177	* If we're doing early wake-ups, we must take the UsedList lock before we
2178	* start querying the current time.
2179	* Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2180	*/
2181	bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2182	if (fDoEarlyWakeUps)
2183	{
2184	int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2185	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2186	}
2187
2188	pCurGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
2189
2190	/* GIP hack: We might are frequently sleeping for short intervals where the
2191	difference between GIP and system time matters on systems with high resolution
2192	system time. So, convert the input from GIP to System time in that case. */
2193	Assert(ASMGetFlags() & X86_EFL_IF);
2194	const uint64_t u64NowSys = RTTimeSystemNanoTS();
2195	const uint64_t u64NowGip = RTTimeNanoTS();
2196	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2197
2198	if (fDoEarlyWakeUps)
2199	{
2200	pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2201	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2202	}
2203
2204	/*
2205	* Go to sleep if we must...
2206	* Cap the sleep time to 1 second to be on the safe side.
2207	*/
2208	int rc;
2209	uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2210	if ( u64NowGip < u64ExpireGipTime
2211	&& cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2212	? pGVMM->nsMinSleepCompany
2213	: pGVMM->nsMinSleepAlone))
2214	{
2215	pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2216	if (cNsInterval > RT_NS_1SEC)
2217	u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2218	ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2219	ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2220	if (fDoEarlyWakeUps)
2221	{
2222	if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2223	pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2224	GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2225	}
2226	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2227
2228	rc = RTSemEventMultiWaitEx(pCurGVCpu->gvmm.s.HaltEventMulti,
2229	RTSEMWAIT_FLAGS_ABSOLUTE \| RTSEMWAIT_FLAGS_NANOSECS \| RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2230	u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2231	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2232
2233	ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0);
2234	ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2235
2236	/* Reset the semaphore to try prevent a few false wake-ups. */
2237	if (rc == VINF_SUCCESS)
2238	{
2239	RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
2240	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2241	}
2242	else if (rc == VERR_TIMEOUT)
2243	{
2244	pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2245	rc = VINF_SUCCESS;
2246	}
2247	}
2248	else
2249	{
2250	pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2251	if (fDoEarlyWakeUps)
2252	GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2253	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2254	RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
2255	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2256	rc = VINF_SUCCESS;
2257	}
2258
2259	return rc;
2260	}
2261
2262
2263	/**
2264	* Halt the EMT thread.
2265	*
2266	* @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2267	* VERR_INTERRUPTED if a signal was scheduled for the thread.
2268	* @param pGVM The global (ring-0) VM structure.
2269	* @param pVM The cross context VM structure.
2270	* @param idCpu The Virtual CPU ID of the calling EMT.
2271	* @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2272	* @thread EMT(idCpu).
2273	*/
2274	GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2275	{
2276	GVMM_CHECK_SMAP_SETUP();
2277	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2278	PGVMM pGVMM;
2279	int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2280	if (RT_SUCCESS(rc))
2281	{
2282	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2283	rc = GVMMR0SchedHalt(pGVM, pVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2284	}
2285	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2286	return rc;
2287	}
2288
2289
2290
2291	/**
2292	* Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2293	* the a sleeping EMT.
2294	*
2295	* @retval VINF_SUCCESS if successfully woken up.
2296	* @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2297	*
2298	* @param pGVM The global (ring-0) VM structure.
2299	* @param pGVCpu The global (ring-0) VCPU structure.
2300	*/
2301	DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2302	{
2303	pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2304
2305	/*
2306	* Signal the semaphore regardless of whether it's current blocked on it.
2307	*
2308	* The reason for this is that there is absolutely no way we can be 100%
2309	* certain that it isn't about go to go to sleep on it and just got
2310	* delayed a bit en route. So, we will always signal the semaphore when
2311	* the it is flagged as halted in the VMM.
2312	*/
2313	/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2314	int rc;
2315	if (pGVCpu->gvmm.s.u64HaltExpire)
2316	{
2317	rc = VINF_SUCCESS;
2318	ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2319	}
2320	else
2321	{
2322	rc = VINF_GVM_NOT_BLOCKED;
2323	pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2324	}
2325
2326	int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2327	AssertRC(rc2);
2328
2329	return rc;
2330	}
2331
2332
2333	/**
2334	* Wakes up the halted EMT thread so it can service a pending request.
2335	*
2336	* @returns VBox status code.
2337	* @retval VINF_SUCCESS if successfully woken up.
2338	* @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2339	*
2340	* @param pGVM The global (ring-0) VM structure.
2341	* @param pVM The cross context VM structure.
2342	* @param idCpu The Virtual CPU ID of the EMT to wake up.
2343	* @param fTakeUsedLock Take the used lock or not
2344	* @thread Any but EMT(idCpu).
2345	*/
2346	GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
2347	{
2348	GVMM_CHECK_SMAP_SETUP();
2349	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2350
2351	/*
2352	* Validate input and take the UsedLock.
2353	*/
2354	PGVMM pGVMM;
2355	int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
2356	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2357	if (RT_SUCCESS(rc))
2358	{
2359	if (idCpu < pGVM->cCpus)
2360	{
2361	/*
2362	* Do the actual job.
2363	*/
2364	rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2365	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2366
2367	if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2368	{
2369	/*
2370	* While we're here, do a round of scheduling.
2371	*/
2372	Assert(ASMGetFlags() & X86_EFL_IF);
2373	const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2374	pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2375	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2376	}
2377	}
2378	else
2379	rc = VERR_INVALID_CPU_ID;
2380
2381	if (fTakeUsedLock)
2382	{
2383	int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2384	AssertRC(rc2);
2385	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2386	}
2387	}
2388
2389	LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2390	return rc;
2391	}
2392
2393
2394	/**
2395	* Wakes up the halted EMT thread so it can service a pending request.
2396	*
2397	* @returns VBox status code.
2398	* @retval VINF_SUCCESS if successfully woken up.
2399	* @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2400	*
2401	* @param pGVM The global (ring-0) VM structure.
2402	* @param pVM The cross context VM structure.
2403	* @param idCpu The Virtual CPU ID of the EMT to wake up.
2404	* @thread Any but EMT(idCpu).
2405	*/
2406	GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2407	{
2408	return GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
2409	}
2410
2411
2412	/**
2413	* Wakes up the halted EMT thread so it can service a pending request, no GVM
2414	* parameter and no used locking.
2415	*
2416	* @returns VBox status code.
2417	* @retval VINF_SUCCESS if successfully woken up.
2418	* @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2419	*
2420	* @param pVM The cross context VM structure.
2421	* @param idCpu The Virtual CPU ID of the EMT to wake up.
2422	* @thread Any but EMT(idCpu).
2423	* @deprecated Don't use in new code if possible! Use the GVM variant.
2424	*/
2425	GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PVM pVM, VMCPUID idCpu)
2426	{
2427	GVMM_CHECK_SMAP_SETUP();
2428	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2429	PGVM pGVM;
2430	PGVMM pGVMM;
2431	int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /fTakeUsedLock/);
2432	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2433	if (RT_SUCCESS(rc))
2434	rc = GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, false /fTakeUsedLock/);
2435	return rc;
2436	}
2437
2438
2439	/**
2440	* Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2441	* the Virtual CPU if it's still busy executing guest code.
2442	*
2443	* @returns VBox status code.
2444	* @retval VINF_SUCCESS if poked successfully.
2445	* @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2446	*
2447	* @param pGVM The global (ring-0) VM structure.
2448	* @param pVCpu The cross context virtual CPU structure.
2449	*/
2450	DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPU pVCpu)
2451	{
2452	pGVM->gvmm.s.StatsSched.cPokeCalls++;
2453
2454	RTCPUID idHostCpu = pVCpu->idHostCpu;
2455	if ( idHostCpu == NIL_RTCPUID
2456	\|\| VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2457	{
2458	pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2459	return VINF_GVM_NOT_BUSY_IN_GC;
2460	}
2461
2462	/* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2463	RTMpPokeCpu(idHostCpu);
2464	return VINF_SUCCESS;
2465	}
2466
2467
2468	/**
2469	* Pokes an EMT if it's still busy running guest code.
2470	*
2471	* @returns VBox status code.
2472	* @retval VINF_SUCCESS if poked successfully.
2473	* @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2474	*
2475	* @param pGVM The global (ring-0) VM structure.
2476	* @param pVM The cross context VM structure.
2477	* @param idCpu The ID of the virtual CPU to poke.
2478	* @param fTakeUsedLock Take the used lock or not
2479	*/
2480	GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
2481	{
2482	/*
2483	* Validate input and take the UsedLock.
2484	*/
2485	PGVMM pGVMM;
2486	int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
2487	if (RT_SUCCESS(rc))
2488	{
2489	if (idCpu < pGVM->cCpus)
2490	rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2491	else
2492	rc = VERR_INVALID_CPU_ID;
2493
2494	if (fTakeUsedLock)
2495	{
2496	int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2497	AssertRC(rc2);
2498	}
2499	}
2500
2501	LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2502	return rc;
2503	}
2504
2505
2506	/**
2507	* Pokes an EMT if it's still busy running guest code.
2508	*
2509	* @returns VBox status code.
2510	* @retval VINF_SUCCESS if poked successfully.
2511	* @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2512	*
2513	* @param pGVM The global (ring-0) VM structure.
2514	* @param pVM The cross context VM structure.
2515	* @param idCpu The ID of the virtual CPU to poke.
2516	*/
2517	GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2518	{
2519	return GVMMR0SchedPokeEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
2520	}
2521
2522
2523	/**
2524	* Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2525	* used locking.
2526	*
2527	* @returns VBox status code.
2528	* @retval VINF_SUCCESS if poked successfully.
2529	* @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2530	*
2531	* @param pVM The cross context VM structure.
2532	* @param idCpu The ID of the virtual CPU to poke.
2533	*
2534	* @deprecated Don't use in new code if possible! Use the GVM variant.
2535	*/
2536	GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PVM pVM, VMCPUID idCpu)
2537	{
2538	PGVM pGVM;
2539	PGVMM pGVMM;
2540	int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /fTakeUsedLock/);
2541	if (RT_SUCCESS(rc))
2542	{
2543	if (idCpu < pGVM->cCpus)
2544	rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2545	else
2546	rc = VERR_INVALID_CPU_ID;
2547	}
2548	return rc;
2549	}
2550
2551
2552	/**
2553	* Wakes up a set of halted EMT threads so they can service pending request.
2554	*
2555	* @returns VBox status code, no informational stuff.
2556	*
2557	* @param pGVM The global (ring-0) VM structure.
2558	* @param pVM The cross context VM structure.
2559	* @param pSleepSet The set of sleepers to wake up.
2560	* @param pPokeSet The set of CPUs to poke.
2561	*/
2562	GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PVM pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2563	{
2564	AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2565	AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2566	GVMM_CHECK_SMAP_SETUP();
2567	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2568	RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2569
2570	/*
2571	* Validate input and take the UsedLock.
2572	*/
2573	PGVMM pGVMM;
2574	int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /* fTakeUsedLock */);
2575	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2576	if (RT_SUCCESS(rc))
2577	{
2578	rc = VINF_SUCCESS;
2579	VMCPUID idCpu = pGVM->cCpus;
2580	while (idCpu-- > 0)
2581	{
2582	/* Don't try poke or wake up ourselves. */
2583	if (pGVM->aCpus[idCpu].hEMT == hSelf)
2584	continue;
2585
2586	/* just ignore errors for now. */
2587	if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2588	{
2589	gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2590	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2591	}
2592	else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2593	{
2594	gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2595	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2596	}
2597	}
2598
2599	int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2600	AssertRC(rc2);
2601	GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2602	}
2603
2604	LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2605	return rc;
2606	}
2607
2608
2609	/**
2610	* VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2611	*
2612	* @returns see GVMMR0SchedWakeUpAndPokeCpus.
2613	* @param pGVM The global (ring-0) VM structure.
2614	* @param pVM The cross context VM structure.
2615	* @param pReq Pointer to the request packet.
2616	*/
2617	GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PVM pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2618	{
2619	/*
2620	* Validate input and pass it on.
2621	*/
2622	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2623	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
2624
2625	return GVMMR0SchedWakeUpAndPokeCpus(pGVM, pVM, &pReq->SleepSet, &pReq->PokeSet);
2626	}
2627
2628
2629
2630	/**
2631	* Poll the schedule to see if someone else should get a chance to run.
2632	*
2633	* This is a bit hackish and will not work too well if the machine is
2634	* under heavy load from non-VM processes.
2635	*
2636	* @returns VINF_SUCCESS if not yielded.
2637	* VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2638	* @param pGVM The global (ring-0) VM structure.
2639	* @param pVM The cross context VM structure.
2640	* @param idCpu The Virtual CPU ID of the calling EMT.
2641	* @param fYield Whether to yield or not.
2642	* This is for when we're spinning in the halt loop.
2643	* @thread EMT(idCpu).
2644	*/
2645	GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fYield)
2646	{
2647	/*
2648	* Validate input.
2649	*/
2650	PGVMM pGVMM;
2651	int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2652	if (RT_SUCCESS(rc))
2653	{
2654	/*
2655	* We currently only implement helping doing wakeups (fYield = false), so don't
2656	* bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2657	*/
2658	if (!fYield && pGVMM->fDoEarlyWakeUps)
2659	{
2660	rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2661	pGVM->gvmm.s.StatsSched.cPollCalls++;
2662
2663	Assert(ASMGetFlags() & X86_EFL_IF);
2664	const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2665
2666	pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2667
2668	GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2669	}
2670	/*
2671	* Not quite sure what we could do here...
2672	*/
2673	else if (fYield)
2674	rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2675	else
2676	rc = VINF_SUCCESS;
2677	}
2678
2679	LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2680	return rc;
2681	}
2682
2683
2684	#ifdef GVMM_SCHED_WITH_PPT
2685	/**
2686	* Timer callback for the periodic preemption timer.
2687	*
2688	* @param pTimer The timer handle.
2689	* @param pvUser Pointer to the per cpu structure.
2690	* @param iTick The current tick.
2691	*/
2692	static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2693	{
2694	PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2695	NOREF(pTimer); NOREF(iTick);
2696
2697	/*
2698	* Termination check
2699	*/
2700	if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2701	return;
2702
2703	/*
2704	* Do the house keeping.
2705	*/
2706	RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2707
2708	if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2709	{
2710	/*
2711	* Historicize the max frequency.
2712	*/
2713	uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2714	pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2715	pCpu->Ppt.iTickHistorization = 0;
2716	pCpu->Ppt.uDesiredHz = 0;
2717
2718	/*
2719	* Check if the current timer frequency.
2720	*/
2721	uint32_t uHistMaxHz = 0;
2722	for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2723	if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2724	uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2725	if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2726	RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2727	else if (uHistMaxHz)
2728	{
2729	/*
2730	* Reprogram it.
2731	*/
2732	pCpu->Ppt.cChanges++;
2733	pCpu->Ppt.iTickHistorization = 0;
2734	pCpu->Ppt.uTimerHz = uHistMaxHz;
2735	uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2736	pCpu->Ppt.cNsInterval = cNsInterval;
2737	if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2738	pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2739	+ GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2740	/ cNsInterval;
2741	else
2742	pCpu->Ppt.cTicksHistoriziationInterval = 1;
2743	RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2744
2745	/SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);/
2746	RTTimerChangeInterval(pTimer, cNsInterval);
2747	}
2748	else
2749	{
2750	/*
2751	* Stop it.
2752	*/
2753	pCpu->Ppt.fStarted = false;
2754	pCpu->Ppt.uTimerHz = 0;
2755	pCpu->Ppt.cNsInterval = 0;
2756	RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2757
2758	/SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);/
2759	RTTimerStop(pTimer);
2760	}
2761	}
2762	else
2763	RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2764	}
2765	#endif /* GVMM_SCHED_WITH_PPT */
2766
2767
2768	/**
2769	* Updates the periodic preemption timer for the calling CPU.
2770	*
2771	* The caller must have disabled preemption!
2772	* The caller must check that the host can do high resolution timers.
2773	*
2774	* @param pVM The cross context VM structure.
2775	* @param idHostCpu The current host CPU id.
2776	* @param uHz The desired frequency.
2777	*/
2778	GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz)
2779	{
2780	NOREF(pVM);
2781	#ifdef GVMM_SCHED_WITH_PPT
2782	Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2783	Assert(RTTimerCanDoHighResolution());
2784
2785	/*
2786	* Resolve the per CPU data.
2787	*/
2788	uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2789	PGVMM pGVMM = g_pGVMM;
2790	if ( !VALID_PTR(pGVMM)
2791	\|\| pGVMM->u32Magic != GVMM_MAGIC)
2792	return;
2793	AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2794	PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2795	AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2796	&& pCpu->idCpu == idHostCpu,
2797	("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2798
2799	/*
2800	* Check whether we need to do anything about the timer.
2801	* We have to be a little bit careful since we might be race the timer
2802	* callback here.
2803	*/
2804	if (uHz > 16384)
2805	uHz = 16384; /** @todo add a query method for this! */
2806	if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2807	&& uHz >= pCpu->Ppt.uMinHz
2808	&& !pCpu->Ppt.fStarting /* solaris paranoia */))
2809	{
2810	RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2811
2812	pCpu->Ppt.uDesiredHz = uHz;
2813	uint32_t cNsInterval = 0;
2814	if (!pCpu->Ppt.fStarted)
2815	{
2816	pCpu->Ppt.cStarts++;
2817	pCpu->Ppt.fStarted = true;
2818	pCpu->Ppt.fStarting = true;
2819	pCpu->Ppt.iTickHistorization = 0;
2820	pCpu->Ppt.uTimerHz = uHz;
2821	pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2822	if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2823	pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2824	+ GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2825	/ cNsInterval;
2826	else
2827	pCpu->Ppt.cTicksHistoriziationInterval = 1;
2828	}
2829
2830	RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2831
2832	if (cNsInterval)
2833	{
2834	RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2835	int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2836	AssertRC(rc);
2837
2838	RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2839	if (RT_FAILURE(rc))
2840	pCpu->Ppt.fStarted = false;
2841	pCpu->Ppt.fStarting = false;
2842	RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2843	}
2844	}
2845	#else /* !GVMM_SCHED_WITH_PPT */
2846	NOREF(idHostCpu); NOREF(uHz);
2847	#endif /* !GVMM_SCHED_WITH_PPT */
2848	}
2849
2850
2851	/**
2852	* Retrieves the GVMM statistics visible to the caller.
2853	*
2854	* @returns VBox status code.
2855	*
2856	* @param pStats Where to put the statistics.
2857	* @param pSession The current session.
2858	* @param pGVM The GVM to obtain statistics for. Optional.
2859	* @param pVM The VM structure corresponding to @a pGVM.
2860	*/
2861	GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
2862	{
2863	LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
2864
2865	/*
2866	* Validate input.
2867	*/
2868	AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2869	AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2870	pStats->cVMs = 0; /* (crash before taking the sem...) */
2871
2872	/*
2873	* Take the lock and get the VM statistics.
2874	*/
2875	PGVMM pGVMM;
2876	if (pGVM)
2877	{
2878	int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /fTakeUsedLock/);
2879	if (RT_FAILURE(rc))
2880	return rc;
2881	pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2882	}
2883	else
2884	{
2885	GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2886	memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2887
2888	int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2889	AssertRCReturn(rc, rc);
2890	}
2891
2892	/*
2893	* Enumerate the VMs and add the ones visible to the statistics.
2894	*/
2895	pStats->cVMs = 0;
2896	pStats->cEMTs = 0;
2897	memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2898
2899	for (unsigned i = pGVMM->iUsedHead;
2900	i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2901	i = pGVMM->aHandles[i].iNext)
2902	{
2903	PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
2904	void *pvObj = pGVMM->aHandles[i].pvObj;
2905	if ( VALID_PTR(pvObj)
2906	&& VALID_PTR(pOtherGVM)
2907	&& pOtherGVM->u32Magic == GVM_MAGIC
2908	&& RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2909	{
2910	pStats->cVMs++;
2911	pStats->cEMTs += pOtherGVM->cCpus;
2912
2913	pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
2914	pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
2915	pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
2916	pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2917	pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
2918
2919	pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
2920	pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2921	pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2922
2923	pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
2924	pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
2925
2926	pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
2927	pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
2928	pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
2929	}
2930	}
2931
2932	/*
2933	* Copy out the per host CPU statistics.
2934	*/
2935	uint32_t iDstCpu = 0;
2936	uint32_t cSrcCpus = pGVMM->cHostCpus;
2937	for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2938	{
2939	if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2940	{
2941	pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2942	pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2943	#ifdef GVMM_SCHED_WITH_PPT
2944	pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2945	pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2946	pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2947	pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2948	#else
2949	pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2950	pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2951	pStats->aHostCpus[iDstCpu].cChanges = 0;
2952	pStats->aHostCpus[iDstCpu].cStarts = 0;
2953	#endif
2954	iDstCpu++;
2955	if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
2956	break;
2957	}
2958	}
2959	pStats->cHostCpus = iDstCpu;
2960
2961	GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2962
2963	return VINF_SUCCESS;
2964	}
2965
2966
2967	/**
2968	* VMMR0 request wrapper for GVMMR0QueryStatistics.
2969	*
2970	* @returns see GVMMR0QueryStatistics.
2971	* @param pGVM The global (ring-0) VM structure. Optional.
2972	* @param pVM The cross context VM structure. Optional.
2973	* @param pReq Pointer to the request packet.
2974	* @param pSession The current session.
2975	*/
2976	GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PVM pVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
2977	{
2978	/*
2979	* Validate input and pass it on.
2980	*/
2981	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2982	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
2983	AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
2984
2985	return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM, pVM);
2986	}
2987
2988
2989	/**
2990	* Resets the specified GVMM statistics.
2991	*
2992	* @returns VBox status code.
2993	*
2994	* @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
2995	* @param pSession The current session.
2996	* @param pGVM The GVM to reset statistics for. Optional.
2997	* @param pVM The VM structure corresponding to @a pGVM.
2998	*/
2999	GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
3000	{
3001	LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
3002
3003	/*
3004	* Validate input.
3005	*/
3006	AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3007	AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3008
3009	/*
3010	* Take the lock and get the VM statistics.
3011	*/
3012	PGVMM pGVMM;
3013	if (pGVM)
3014	{
3015	int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /fTakeUsedLock/);
3016	if (RT_FAILURE(rc))
3017	return rc;
3018	# define MAYBE_RESET_FIELD(field) \
3019	do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3020	MAYBE_RESET_FIELD(cHaltCalls);
3021	MAYBE_RESET_FIELD(cHaltBlocking);
3022	MAYBE_RESET_FIELD(cHaltTimeouts);
3023	MAYBE_RESET_FIELD(cHaltNotBlocking);
3024	MAYBE_RESET_FIELD(cHaltWakeUps);
3025	MAYBE_RESET_FIELD(cWakeUpCalls);
3026	MAYBE_RESET_FIELD(cWakeUpNotHalted);
3027	MAYBE_RESET_FIELD(cWakeUpWakeUps);
3028	MAYBE_RESET_FIELD(cPokeCalls);
3029	MAYBE_RESET_FIELD(cPokeNotBusy);
3030	MAYBE_RESET_FIELD(cPollCalls);
3031	MAYBE_RESET_FIELD(cPollHalts);
3032	MAYBE_RESET_FIELD(cPollWakeUps);
3033	# undef MAYBE_RESET_FIELD
3034	}
3035	else
3036	{
3037	GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3038
3039	int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3040	AssertRCReturn(rc, rc);
3041	}
3042
3043	/*
3044	* Enumerate the VMs and add the ones visible to the statistics.
3045	*/
3046	if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
3047	{
3048	for (unsigned i = pGVMM->iUsedHead;
3049	i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3050	i = pGVMM->aHandles[i].iNext)
3051	{
3052	PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3053	void *pvObj = pGVMM->aHandles[i].pvObj;
3054	if ( VALID_PTR(pvObj)
3055	&& VALID_PTR(pOtherGVM)
3056	&& pOtherGVM->u32Magic == GVM_MAGIC
3057	&& RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3058	{
3059	# define MAYBE_RESET_FIELD(field) \
3060	do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3061	MAYBE_RESET_FIELD(cHaltCalls);
3062	MAYBE_RESET_FIELD(cHaltBlocking);
3063	MAYBE_RESET_FIELD(cHaltTimeouts);
3064	MAYBE_RESET_FIELD(cHaltNotBlocking);
3065	MAYBE_RESET_FIELD(cHaltWakeUps);
3066	MAYBE_RESET_FIELD(cWakeUpCalls);
3067	MAYBE_RESET_FIELD(cWakeUpNotHalted);
3068	MAYBE_RESET_FIELD(cWakeUpWakeUps);
3069	MAYBE_RESET_FIELD(cPokeCalls);
3070	MAYBE_RESET_FIELD(cPokeNotBusy);
3071	MAYBE_RESET_FIELD(cPollCalls);
3072	MAYBE_RESET_FIELD(cPollHalts);
3073	MAYBE_RESET_FIELD(cPollWakeUps);
3074	# undef MAYBE_RESET_FIELD
3075	}
3076	}
3077	}
3078
3079	GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3080
3081	return VINF_SUCCESS;
3082	}
3083
3084
3085	/**
3086	* VMMR0 request wrapper for GVMMR0ResetStatistics.
3087	*
3088	* @returns see GVMMR0ResetStatistics.
3089	* @param pGVM The global (ring-0) VM structure. Optional.
3090	* @param pVM The cross context VM structure. Optional.
3091	* @param pReq Pointer to the request packet.
3092	* @param pSession The current session.
3093	*/
3094	GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PVM pVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3095	{
3096	/*
3097	* Validate input and pass it on.
3098	*/
3099	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3100	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
3101	AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3102
3103	return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM, pVM);
3104	}
3105

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 75649

Download in other formats: