GMMR0.cpp@ 29575

Last change on this file since 29575 was 29575, checked in by vboxsync, 15 years ago
The ring-3 chunk cache can be out of sync; don't fail.
Property svn:eol-style set to `native` Property svn:keywords set to `Id`
File size: 142.4 KB

Line
1	/* $Id: GMMR0.cpp 29575 2010-05-17 16:28:36Z vboxsync $ */
2	/** @file
3	* GMM - Global Memory Manager.
4	*/
5
6	/*
7	* Copyright (C) 2007 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18
19	/** @page pg_gmm GMM - The Global Memory Manager
20	*
21	* As the name indicates, this component is responsible for global memory
22	* management. Currently only guest RAM is allocated from the GMM, but this
23	* may change to include shadow page tables and other bits later.
24	*
25	* Guest RAM is managed as individual pages, but allocated from the host OS
26	* in chunks for reasons of portability / efficiency. To minimize the memory
27	* footprint all tracking structure must be as small as possible without
28	* unnecessary performance penalties.
29	*
30	* The allocation chunks has fixed sized, the size defined at compile time
31	* by the #GMM_CHUNK_SIZE \#define.
32	*
33	* Each chunk is given an unquie ID. Each page also has a unique ID. The
34	* relation ship between the two IDs is:
35	* @code
36	* GMM_CHUNK_SHIFT = log2(GMM_CHUNK_SIZE / PAGE_SIZE);
37	* idPage = (idChunk << GMM_CHUNK_SHIFT) \| iPage;
38	* @endcode
39	* Where iPage is the index of the page within the chunk. This ID scheme
40	* permits for efficient chunk and page lookup, but it relies on the chunk size
41	* to be set at compile time. The chunks are organized in an AVL tree with their
42	* IDs being the keys.
43	*
44	* The physical address of each page in an allocation chunk is maintained by
45	* the #RTR0MEMOBJ and obtained using #RTR0MemObjGetPagePhysAddr. There is no
46	* need to duplicate this information (it'll cost 8-bytes per page if we did).
47	*
48	* So what do we need to track per page? Most importantly we need to know
49	* which state the page is in:
50	* - Private - Allocated for (eventually) backing one particular VM page.
51	* - Shared - Readonly page that is used by one or more VMs and treated
52	* as COW by PGM.
53	* - Free - Not used by anyone.
54	*
55	* For the page replacement operations (sharing, defragmenting and freeing)
56	* to be somewhat efficient, private pages needs to be associated with a
57	* particular page in a particular VM.
58	*
59	* Tracking the usage of shared pages is impractical and expensive, so we'll
60	* settle for a reference counting system instead.
61	*
62	* Free pages will be chained on LIFOs
63	*
64	* On 64-bit systems we will use a 64-bit bitfield per page, while on 32-bit
65	* systems a 32-bit bitfield will have to suffice because of address space
66	* limitations. The #GMMPAGE structure shows the details.
67	*
68	*
69	* @section sec_gmm_alloc_strat Page Allocation Strategy
70	*
71	* The strategy for allocating pages has to take fragmentation and shared
72	* pages into account, or we may end up with with 2000 chunks with only
73	* a few pages in each. Shared pages cannot easily be reallocated because
74	* of the inaccurate usage accounting (see above). Private pages can be
75	* reallocated by a defragmentation thread in the same manner that sharing
76	* is done.
77	*
78	* The first approach is to manage the free pages in two sets depending on
79	* whether they are mainly for the allocation of shared or private pages.
80	* In the initial implementation there will be almost no possibility for
81	* mixing shared and private pages in the same chunk (only if we're really
82	* stressed on memory), but when we implement forking of VMs and have to
83	* deal with lots of COW pages it'll start getting kind of interesting.
84	*
85	* The sets are lists of chunks with approximately the same number of
86	* free pages. Say the chunk size is 1MB, meaning 256 pages, and a set
87	* consists of 16 lists. So, the first list will contain the chunks with
88	* 1-7 free pages, the second covers 8-15, and so on. The chunks will be
89	* moved between the lists as pages are freed up or allocated.
90	*
91	*
92	* @section sec_gmm_costs Costs
93	*
94	* The per page cost in kernel space is 32-bit plus whatever RTR0MEMOBJ
95	* entails. In addition there is the chunk cost of approximately
96	* (sizeof(RT0MEMOBJ) + sizof(CHUNK)) / 2^CHUNK_SHIFT bytes per page.
97	*
98	* On Windows the per page #RTR0MEMOBJ cost is 32-bit on 32-bit windows
99	* and 64-bit on 64-bit windows (a PFN_NUMBER in the MDL). So, 64-bit per page.
100	* The cost on Linux is identical, but here it's because of sizeof(struct page *).
101	*
102	*
103	* @section sec_gmm_legacy Legacy Mode for Non-Tier-1 Platforms
104	*
105	* In legacy mode the page source is locked user pages and not
106	* #RTR0MemObjAllocPhysNC, this means that a page can only be allocated
107	* by the VM that locked it. We will make no attempt at implementing
108	* page sharing on these systems, just do enough to make it all work.
109	*
110	*
111	* @subsection sub_gmm_locking Serializing
112	*
113	* One simple fast mutex will be employed in the initial implementation, not
114	* two as metioned in @ref subsec_pgmPhys_Serializing.
115	*
116	* @see @ref subsec_pgmPhys_Serializing
117	*
118	*
119	* @section sec_gmm_overcommit Memory Over-Commitment Management
120	*
121	* The GVM will have to do the system wide memory over-commitment
122	* management. My current ideas are:
123	* - Per VM oc policy that indicates how much to initially commit
124	* to it and what to do in a out-of-memory situation.
125	* - Prevent overtaxing the host.
126	*
127	* There are some challenges here, the main ones are configurability and
128	* security. Should we for instance permit anyone to request 100% memory
129	* commitment? Who should be allowed to do runtime adjustments of the
130	* config. And how to prevent these settings from being lost when the last
131	* VM process exits? The solution is probably to have an optional root
132	* daemon the will keep VMMR0.r0 in memory and enable the security measures.
133	*
134	*
135	*
136	* @section sec_gmm_numa NUMA
137	*
138	* NUMA considerations will be designed and implemented a bit later.
139	*
140	* The preliminary guesses is that we will have to try allocate memory as
141	* close as possible to the CPUs the VM is executed on (EMT and additional CPU
142	* threads). Which means it's mostly about allocation and sharing policies.
143	* Both the scheduler and allocator interface will to supply some NUMA info
144	* and we'll need to have a way to calc access costs.
145	*
146	*/
147
148
149	/*******************************************************************************
150	* Header Files *
151	*******************************************************************************/
152	#define LOG_GROUP LOG_GROUP_GMM
153	#include <VBox/vm.h>
154	#include <VBox/gmm.h>
155	#include "GMMR0Internal.h"
156	#include <VBox/gvm.h>
157	#include <VBox/pgm.h>
158	#include <VBox/log.h>
159	#include <VBox/param.h>
160	#include <VBox/err.h>
161	#include <iprt/asm.h>
162	#include <iprt/avl.h>
163	#include <iprt/mem.h>
164	#include <iprt/memobj.h>
165	#include <iprt/semaphore.h>
166	#include <iprt/string.h>
167
168
169	/*******************************************************************************
170	* Structures and Typedefs *
171	*******************************************************************************/
172	/** Pointer to set of free chunks. */
173	typedef struct GMMCHUNKFREESET *PGMMCHUNKFREESET;
174
175	/** Pointer to a GMM allocation chunk. */
176	typedef struct GMMCHUNK *PGMMCHUNK;
177
178	/**
179	* The per-page tracking structure employed by the GMM.
180	*
181	* On 32-bit hosts we'll some trickery is necessary to compress all
182	* the information into 32-bits. When the fSharedFree member is set,
183	* the 30th bit decides whether it's a free page or not.
184	*
185	* Because of the different layout on 32-bit and 64-bit hosts, macros
186	* are used to get and set some of the data.
187	*/
188	typedef union GMMPAGE
189	{
190	#if HC_ARCH_BITS == 64
191	/** Unsigned integer view. */
192	uint64_t u;
193
194	/** The common view. */
195	struct GMMPAGECOMMON
196	{
197	uint32_t uStuff1 : 32;
198	uint32_t uStuff2 : 30;
199	/** The page state. */
200	uint32_t u2State : 2;
201	} Common;
202
203	/** The view of a private page. */
204	struct GMMPAGEPRIVATE
205	{
206	/** The guest page frame number. (Max addressable: 2 ^ 44 - 16) */
207	uint32_t pfn;
208	/** The GVM handle. (64K VMs) */
209	uint32_t hGVM : 16;
210	/** Reserved. */
211	uint32_t u16Reserved : 14;
212	/** The page state. */
213	uint32_t u2State : 2;
214	} Private;
215
216	/** The view of a shared page. */
217	struct GMMPAGESHARED
218	{
219	/** The host page frame number. (Max addressable: 2 ^ 44 - 16) */
220	uint32_t pfn;
221	/** The reference count (64K VMs). */
222	uint32_t cRefs : 16;
223	/** Reserved. Checksum or something? Two hGVMs for forking? */
224	uint32_t u14Reserved : 14;
225	/** The page state. */
226	uint32_t u2State : 2;
227	} Shared;
228
229	/** The view of a free page. */
230	struct GMMPAGEFREE
231	{
232	/** The index of the next page in the free list. UINT16_MAX is NIL. */
233	uint16_t iNext;
234	/** Reserved. Checksum or something? */
235	uint16_t u16Reserved0;
236	/** Reserved. Checksum or something? */
237	uint32_t u30Reserved1 : 30;
238	/** The page state. */
239	uint32_t u2State : 2;
240	} Free;
241
242	#else /* 32-bit */
243	/** Unsigned integer view. */
244	uint32_t u;
245
246	/** The common view. */
247	struct GMMPAGECOMMON
248	{
249	uint32_t uStuff : 30;
250	/** The page state. */
251	uint32_t u2State : 2;
252	} Common;
253
254	/** The view of a private page. */
255	struct GMMPAGEPRIVATE
256	{
257	/** The guest page frame number. (Max addressable: 2 ^ 36) */
258	uint32_t pfn : 24;
259	/** The GVM handle. (127 VMs) */
260	uint32_t hGVM : 7;
261	/** The top page state bit, MBZ. */
262	uint32_t fZero : 1;
263	} Private;
264
265	/** The view of a shared page. */
266	struct GMMPAGESHARED
267	{
268	/** The reference count. */
269	uint32_t cRefs : 30;
270	/** The page state. */
271	uint32_t u2State : 2;
272	} Shared;
273
274	/** The view of a free page. */
275	struct GMMPAGEFREE
276	{
277	/** The index of the next page in the free list. UINT16_MAX is NIL. */
278	uint32_t iNext : 16;
279	/** Reserved. Checksum or something? */
280	uint32_t u14Reserved : 14;
281	/** The page state. */
282	uint32_t u2State : 2;
283	} Free;
284	#endif
285	} GMMPAGE;
286	AssertCompileSize(GMMPAGE, sizeof(RTHCUINTPTR));
287	/** Pointer to a GMMPAGE. */
288	typedef GMMPAGE *PGMMPAGE;
289
290
291	/** @name The Page States.
292	* @{ */
293	/** A private page. */
294	#define GMM_PAGE_STATE_PRIVATE 0
295	/** A private page - alternative value used on the 32-bit implemenation.
296	* This will never be used on 64-bit hosts. */
297	#define GMM_PAGE_STATE_PRIVATE_32 1
298	/** A shared page. */
299	#define GMM_PAGE_STATE_SHARED 2
300	/** A free page. */
301	#define GMM_PAGE_STATE_FREE 3
302	/** @} */
303
304
305	/** @def GMM_PAGE_IS_PRIVATE
306	*
307	* @returns true if private, false if not.
308	* @param pPage The GMM page.
309	*/
310	#if HC_ARCH_BITS == 64
311	# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_PRIVATE )
312	#else
313	# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Private.fZero == 0 )
314	#endif
315
316	/** @def GMM_PAGE_IS_SHARED
317	*
318	* @returns true if shared, false if not.
319	* @param pPage The GMM page.
320	*/
321	#define GMM_PAGE_IS_SHARED(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_SHARED )
322
323	/** @def GMM_PAGE_IS_FREE
324	*
325	* @returns true if free, false if not.
326	* @param pPage The GMM page.
327	*/
328	#define GMM_PAGE_IS_FREE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_FREE )
329
330	/** @def GMM_PAGE_PFN_LAST
331	* The last valid guest pfn range.
332	* @remark Some of the values outside the range has special meaning,
333	* see GMM_PAGE_PFN_UNSHAREABLE.
334	*/
335	#if HC_ARCH_BITS == 64
336	# define GMM_PAGE_PFN_LAST UINT32_C(0xfffffff0)
337	#else
338	# define GMM_PAGE_PFN_LAST UINT32_C(0x00fffff0)
339	#endif
340	AssertCompile(GMM_PAGE_PFN_LAST == (GMM_GCPHYS_LAST >> PAGE_SHIFT));
341
342	/** @def GMM_PAGE_PFN_UNSHAREABLE
343	* Indicates that this page isn't used for normal guest memory and thus isn't shareable.
344	*/
345	#if HC_ARCH_BITS == 64
346	# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0xfffffff1)
347	#else
348	# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0x00fffff1)
349	#endif
350	AssertCompile(GMM_PAGE_PFN_UNSHAREABLE == (GMM_GCPHYS_UNSHAREABLE >> PAGE_SHIFT));
351
352
353	/**
354	* A GMM allocation chunk ring-3 mapping record.
355	*
356	* This should really be associated with a session and not a VM, but
357	* it's simpler to associated with a VM and cleanup with the VM object
358	* is destroyed.
359	*/
360	typedef struct GMMCHUNKMAP
361	{
362	/** The mapping object. */
363	RTR0MEMOBJ MapObj;
364	/** The VM owning the mapping. */
365	PGVM pGVM;
366	} GMMCHUNKMAP;
367	/** Pointer to a GMM allocation chunk mapping. */
368	typedef struct GMMCHUNKMAP *PGMMCHUNKMAP;
369
370	typedef enum GMMCHUNKTYPE
371	{
372	GMMCHUNKTYPE_INVALID = 0,
373	GMMCHUNKTYPE_NON_CONTINUOUS = 1, /* 4 kb pages */
374	GMMCHUNKTYPE_CONTINUOUS = 2, /* one 2 MB continuous physical range. */
375	GMMCHUNKTYPE_32BIT_HACK = 0x7fffffff
376	} GMMCHUNKTYPE;
377
378
379	/**
380	* A GMM allocation chunk.
381	*/
382	typedef struct GMMCHUNK
383	{
384	/** The AVL node core.
385	* The Key is the chunk ID. */
386	AVLU32NODECORE Core;
387	/** The memory object.
388	* Either from RTR0MemObjAllocPhysNC or RTR0MemObjLockUser depending on
389	* what the host can dish up with. */
390	RTR0MEMOBJ MemObj;
391	/** Pointer to the next chunk in the free list. */
392	PGMMCHUNK pFreeNext;
393	/** Pointer to the previous chunk in the free list. */
394	PGMMCHUNK pFreePrev;
395	/** Pointer to the free set this chunk belongs to. NULL for
396	* chunks with no free pages. */
397	PGMMCHUNKFREESET pSet;
398	/** Pointer to an array of mappings. */
399	PGMMCHUNKMAP paMappings;
400	/** The number of mappings. */
401	uint16_t cMappings;
402	/** The head of the list of free pages. UINT16_MAX is the NIL value. */
403	uint16_t iFreeHead;
404	/** The number of free pages. */
405	uint16_t cFree;
406	/** The GVM handle of the VM that first allocated pages from this chunk, this
407	* is used as a preference when there are several chunks to choose from.
408	* When in bound memory mode this isn't a preference any longer. */
409	uint16_t hGVM;
410	/** The number of private pages. */
411	uint16_t cPrivate;
412	/** The number of shared pages. */
413	uint16_t cShared;
414	/** Chunk type */
415	GMMCHUNKTYPE enmType;
416	/** The pages. */
417	GMMPAGE aPages[GMM_CHUNK_SIZE >> PAGE_SHIFT];
418	} GMMCHUNK;
419
420
421	/**
422	* An allocation chunk TLB entry.
423	*/
424	typedef struct GMMCHUNKTLBE
425	{
426	/** The chunk id. */
427	uint32_t idChunk;
428	/** Pointer to the chunk. */
429	PGMMCHUNK pChunk;
430	} GMMCHUNKTLBE;
431	/** Pointer to an allocation chunk TLB entry. */
432	typedef GMMCHUNKTLBE *PGMMCHUNKTLBE;
433
434
435	/** The number of entries tin the allocation chunk TLB. */
436	#define GMM_CHUNKTLB_ENTRIES 32
437	/** Gets the TLB entry index for the given Chunk ID. */
438	#define GMM_CHUNKTLB_IDX(idChunk) ( (idChunk) & (GMM_CHUNKTLB_ENTRIES - 1) )
439
440	/**
441	* An allocation chunk TLB.
442	*/
443	typedef struct GMMCHUNKTLB
444	{
445	/** The TLB entries. */
446	GMMCHUNKTLBE aEntries[GMM_CHUNKTLB_ENTRIES];
447	} GMMCHUNKTLB;
448	/** Pointer to an allocation chunk TLB. */
449	typedef GMMCHUNKTLB *PGMMCHUNKTLB;
450
451
452	/** The GMMCHUNK::cFree shift count. */
453	#define GMM_CHUNK_FREE_SET_SHIFT 4
454	/** The GMMCHUNK::cFree mask for use when considering relinking a chunk. */
455	#define GMM_CHUNK_FREE_SET_MASK 15
456	/** The number of lists in set. */
457	#define GMM_CHUNK_FREE_SET_LISTS (GMM_CHUNK_NUM_PAGES >> GMM_CHUNK_FREE_SET_SHIFT)
458
459	/**
460	* A set of free chunks.
461	*/
462	typedef struct GMMCHUNKFREESET
463	{
464	/** The number of free pages in the set. */
465	uint64_t cFreePages;
466	/** Chunks ordered by increasing number of free pages. */
467	PGMMCHUNK apLists[GMM_CHUNK_FREE_SET_LISTS];
468	} GMMCHUNKFREESET;
469
470
471	/**
472	* The GMM instance data.
473	*/
474	typedef struct GMM
475	{
476	/** Magic / eye catcher. GMM_MAGIC */
477	uint32_t u32Magic;
478	/** The fast mutex protecting the GMM.
479	* More fine grained locking can be implemented later if necessary. */
480	RTSEMFASTMUTEX Mtx;
481	/** The chunk tree. */
482	PAVLU32NODECORE pChunks;
483	/** The chunk TLB. */
484	GMMCHUNKTLB ChunkTLB;
485	/** The private free set. */
486	GMMCHUNKFREESET Private;
487	/** The shared free set. */
488	GMMCHUNKFREESET Shared;
489
490	/** Shared module tree (global). */
491	/** todo seperate trees for distinctly different guest OSes. */
492	PAVLGCPTRNODECORE pGlobalSharedModuleTree;
493
494	/** The maximum number of pages we're allowed to allocate.
495	* @gcfgm 64-bit GMM/MaxPages Direct.
496	* @gcfgm 32-bit GMM/PctPages Relative to the number of host pages. */
497	uint64_t cMaxPages;
498	/** The number of pages that has been reserved.
499	* The deal is that cReservedPages - cOverCommittedPages <= cMaxPages. */
500	uint64_t cReservedPages;
501	/** The number of pages that we have over-committed in reservations. */
502	uint64_t cOverCommittedPages;
503	/** The number of actually allocated (committed if you like) pages. */
504	uint64_t cAllocatedPages;
505	/** The number of pages that are shared. A subset of cAllocatedPages. */
506	uint64_t cSharedPages;
507	/** The number of pages that are shared that has been left behind by
508	* VMs not doing proper cleanups. */
509	uint64_t cLeftBehindSharedPages;
510	/** The number of allocation chunks.
511	* (The number of pages we've allocated from the host can be derived from this.) */
512	uint32_t cChunks;
513	/** The number of current ballooned pages. */
514	uint64_t cBalloonedPages;
515
516	/** The legacy allocation mode indicator.
517	* This is determined at initialization time. */
518	bool fLegacyAllocationMode;
519	/** The bound memory mode indicator.
520	* When set, the memory will be bound to a specific VM and never
521	* shared. This is always set if fLegacyAllocationMode is set.
522	* (Also determined at initialization time.) */
523	bool fBoundMemoryMode;
524	/** The number of registered VMs. */
525	uint16_t cRegisteredVMs;
526
527	/** The previous allocated Chunk ID.
528	* Used as a hint to avoid scanning the whole bitmap. */
529	uint32_t idChunkPrev;
530	/** Chunk ID allocation bitmap.
531	* Bits of allocated IDs are set, free ones are clear.
532	* The NIL id (0) is marked allocated. */
533	uint32_t bmChunkId[(GMM_CHUNKID_LAST + 1 + 31) / 32];
534	} GMM;
535	/** Pointer to the GMM instance. */
536	typedef GMM *PGMM;
537
538	/** The value of GMM::u32Magic (Katsuhiro Otomo). */
539	#define GMM_MAGIC 0x19540414
540
541
542	/*******************************************************************************
543	* Global Variables *
544	*******************************************************************************/
545	/** Pointer to the GMM instance data. */
546	static PGMM g_pGMM = NULL;
547
548	/** Macro for obtaining and validating the g_pGMM pointer.
549	* On failure it will return from the invoking function with the specified return value.
550	*
551	* @param pGMM The name of the pGMM variable.
552	* @param rc The return value on failure. Use VERR_INTERNAL_ERROR for
553	* VBox status codes.
554	*/
555	#define GMM_GET_VALID_INSTANCE(pGMM, rc) \
556	do { \
557	(pGMM) = g_pGMM; \
558	AssertPtrReturn((pGMM), (rc)); \
559	AssertMsgReturn((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic), (rc)); \
560	} while (0)
561
562	/** Macro for obtaining and validating the g_pGMM pointer, void function variant.
563	* On failure it will return from the invoking function.
564	*
565	* @param pGMM The name of the pGMM variable.
566	*/
567	#define GMM_GET_VALID_INSTANCE_VOID(pGMM) \
568	do { \
569	(pGMM) = g_pGMM; \
570	AssertPtrReturnVoid((pGMM)); \
571	AssertMsgReturnVoid((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic)); \
572	} while (0)
573
574
575	/** @def GMM_CHECK_SANITY_UPON_ENTERING
576	* Checks the sanity of the GMM instance data before making changes.
577	*
578	* This is macro is a stub by default and must be enabled manually in the code.
579	*
580	* @returns true if sane, false if not.
581	* @param pGMM The name of the pGMM variable.
582	*/
583	#if defined(VBOX_STRICT) && 0
584	# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
585	#else
586	# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (true)
587	#endif
588
589	/** @def GMM_CHECK_SANITY_UPON_LEAVING
590	* Checks the sanity of the GMM instance data after making changes.
591	*
592	* This is macro is a stub by default and must be enabled manually in the code.
593	*
594	* @returns true if sane, false if not.
595	* @param pGMM The name of the pGMM variable.
596	*/
597	#if defined(VBOX_STRICT) && 0
598	# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
599	#else
600	# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (true)
601	#endif
602
603	/** @def GMM_CHECK_SANITY_IN_LOOPS
604	* Checks the sanity of the GMM instance in the allocation loops.
605	*
606	* This is macro is a stub by default and must be enabled manually in the code.
607	*
608	* @returns true if sane, false if not.
609	* @param pGMM The name of the pGMM variable.
610	*/
611	#if defined(VBOX_STRICT) && 0
612	# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
613	#else
614	# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (true)
615	#endif
616
617
618	/*******************************************************************************
619	* Internal Functions *
620	*******************************************************************************/
621	static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM);
622	static DECLCALLBACK(int) gmmR0CleanupVMScanChunk(PAVLU32NODECORE pNode, void *pvGMM);
623	static DECLCALLBACK(int) gmmR0CleanupSharedModule(PAVLGCPTRNODECORE pNode, void *pvGVM);
624	/static/ DECLCALLBACK(int) gmmR0CleanupVMDestroyChunk(PAVLU32NODECORE pNode, void *pvGVM);
625	DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet);
626	DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk);
627	static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo);
628	static void gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
629	static void gmmR0FreeSharedPage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage);
630	static int gmmR0UnmapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
631
632
633
634	/**
635	* Initializes the GMM component.
636	*
637	* This is called when the VMMR0.r0 module is loaded and protected by the
638	* loader semaphore.
639	*
640	* @returns VBox status code.
641	*/
642	GMMR0DECL(int) GMMR0Init(void)
643	{
644	LogFlow(("GMMInit:\n"));
645
646	/*
647	* Allocate the instance data and the lock(s).
648	*/
649	PGMM pGMM = (PGMM)RTMemAllocZ(sizeof(*pGMM));
650	if (!pGMM)
651	return VERR_NO_MEMORY;
652	pGMM->u32Magic = GMM_MAGIC;
653	for (unsigned i = 0; i < RT_ELEMENTS(pGMM->ChunkTLB.aEntries); i++)
654	pGMM->ChunkTLB.aEntries[i].idChunk = NIL_GMM_CHUNKID;
655	ASMBitSet(&pGMM->bmChunkId[0], NIL_GMM_CHUNKID);
656
657	int rc = RTSemFastMutexCreate(&pGMM->Mtx);
658	if (RT_SUCCESS(rc))
659	{
660	/*
661	* Check and see if RTR0MemObjAllocPhysNC works.
662	*/
663	#if 0 /* later, see #3170. */
664	RTR0MEMOBJ MemObj;
665	rc = RTR0MemObjAllocPhysNC(&MemObj, _64K, NIL_RTHCPHYS);
666	if (RT_SUCCESS(rc))
667	{
668	rc = RTR0MemObjFree(MemObj, true);
669	AssertRC(rc);
670	}
671	else if (rc == VERR_NOT_SUPPORTED)
672	pGMM->fLegacyAllocationMode = pGMM->fBoundMemoryMode = true;
673	else
674	SUPR0Printf("GMMR0Init: RTR0MemObjAllocPhysNC(,64K,Any) -> %d!\n", rc);
675	#else
676	# if defined(RT_OS_WINDOWS) \|\| defined(RT_OS_SOLARIS) \|\| defined(RT_OS_LINUX) \|\| defined(RT_OS_FREEBSD)
677	pGMM->fLegacyAllocationMode = false;
678	# if ARCH_BITS == 32
679	/* Don't reuse possibly partial chunks because of the virtual address space limitation. */
680	pGMM->fBoundMemoryMode = true;
681	# else
682	pGMM->fBoundMemoryMode = false;
683	# endif
684	# else
685	pGMM->fLegacyAllocationMode = true;
686	pGMM->fBoundMemoryMode = true;
687	# endif
688	#endif
689
690	/*
691	* Query system page count and guess a reasonable cMaxPages value.
692	*/
693	pGMM->cMaxPages = UINT32_MAX; /** @todo IPRT function for query ram size and such. */
694
695	g_pGMM = pGMM;
696	LogFlow(("GMMInit: pGMM=%p fLegacyAllocationMode=%RTbool fBoundMemoryMode=%RTbool\n", pGMM, pGMM->fLegacyAllocationMode, pGMM->fBoundMemoryMode));
697	return VINF_SUCCESS;
698	}
699
700	RTMemFree(pGMM);
701	SUPR0Printf("GMMR0Init: failed! rc=%d\n", rc);
702	return rc;
703	}
704
705
706	/**
707	* Terminates the GMM component.
708	*/
709	GMMR0DECL(void) GMMR0Term(void)
710	{
711	LogFlow(("GMMTerm:\n"));
712
713	/*
714	* Take care / be paranoid...
715	*/
716	PGMM pGMM = g_pGMM;
717	if (!VALID_PTR(pGMM))
718	return;
719	if (pGMM->u32Magic != GMM_MAGIC)
720	{
721	SUPR0Printf("GMMR0Term: u32Magic=%#x\n", pGMM->u32Magic);
722	return;
723	}
724
725	/*
726	* Undo what init did and free all the resources we've acquired.
727	*/
728	/* Destroy the fundamentals. */
729	g_pGMM = NULL;
730	pGMM->u32Magic++;
731	RTSemFastMutexDestroy(pGMM->Mtx);
732	pGMM->Mtx = NIL_RTSEMFASTMUTEX;
733
734	/* free any chunks still hanging around. */
735	RTAvlU32Destroy(&pGMM->pChunks, gmmR0TermDestroyChunk, pGMM);
736
737	/* finally the instance data itself. */
738	RTMemFree(pGMM);
739	LogFlow(("GMMTerm: done\n"));
740	}
741
742
743	/**
744	* RTAvlU32Destroy callback.
745	*
746	* @returns 0
747	* @param pNode The node to destroy.
748	* @param pvGMM The GMM handle.
749	*/
750	static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM)
751	{
752	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
753
754	if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
755	SUPR0Printf("GMMR0Term: %p/%#x: cFree=%d cPrivate=%d cShared=%d cMappings=%d\n", pChunk,
756	pChunk->Core.Key, pChunk->cFree, pChunk->cPrivate, pChunk->cShared, pChunk->cMappings);
757
758	int rc = RTR0MemObjFree(pChunk->MemObj, true /* fFreeMappings */);
759	if (RT_FAILURE(rc))
760	{
761	SUPR0Printf("GMMR0Term: %p/%#x: RTRMemObjFree(%p,true) -> %d (cMappings=%d)\n", pChunk,
762	pChunk->Core.Key, pChunk->MemObj, rc, pChunk->cMappings);
763	AssertRC(rc);
764	}
765	pChunk->MemObj = NIL_RTR0MEMOBJ;
766
767	RTMemFree(pChunk->paMappings);
768	pChunk->paMappings = NULL;
769
770	RTMemFree(pChunk);
771	NOREF(pvGMM);
772	return 0;
773	}
774
775
776	/**
777	* Initializes the per-VM data for the GMM.
778	*
779	* This is called from within the GVMM lock (from GVMMR0CreateVM)
780	* and should only initialize the data members so GMMR0CleanupVM
781	* can deal with them. We reserve no memory or anything here,
782	* that's done later in GMMR0InitVM.
783	*
784	* @param pGVM Pointer to the Global VM structure.
785	*/
786	GMMR0DECL(void) GMMR0InitPerVMData(PGVM pGVM)
787	{
788	AssertCompile(RT_SIZEOFMEMB(GVM,gmm.s) <= RT_SIZEOFMEMB(GVM,gmm.padding));
789
790	pGVM->gmm.s.enmPolicy = GMMOCPOLICY_INVALID;
791	pGVM->gmm.s.enmPriority = GMMPRIORITY_INVALID;
792	pGVM->gmm.s.fMayAllocate = false;
793	}
794
795
796	/**
797	* Cleans up when a VM is terminating.
798	*
799	* @param pGVM Pointer to the Global VM structure.
800	*/
801	GMMR0DECL(void) GMMR0CleanupVM(PGVM pGVM)
802	{
803	LogFlow(("GMMR0CleanupVM: pGVM=%p:{.pVM=%p, .hSelf=%#x}\n", pGVM, pGVM->pVM, pGVM->hSelf));
804
805	PGMM pGMM;
806	GMM_GET_VALID_INSTANCE_VOID(pGMM);
807
808	int rc = RTSemFastMutexRequest(pGMM->Mtx);
809	AssertRC(rc);
810	GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
811
812	#ifdef VBOX_WITH_PAGE_SHARING
813	/* Clean up all registered shared modules. */
814	RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, pGVM);
815	#endif
816
817	/*
818	* The policy is 'INVALID' until the initial reservation
819	* request has been serviced.
820	*/
821	if ( pGVM->gmm.s.enmPolicy > GMMOCPOLICY_INVALID
822	&& pGVM->gmm.s.enmPolicy < GMMOCPOLICY_END)
823	{
824	/*
825	* If it's the last VM around, we can skip walking all the chunk looking
826	* for the pages owned by this VM and instead flush the whole shebang.
827	*
828	* This takes care of the eventuality that a VM has left shared page
829	* references behind (shouldn't happen of course, but you never know).
830	*/
831	Assert(pGMM->cRegisteredVMs);
832	pGMM->cRegisteredVMs--;
833	#if 0 /* disabled so it won't hide bugs. */
834	if (!pGMM->cRegisteredVMs)
835	{
836	RTAvlU32Destroy(&pGMM->pChunks, gmmR0CleanupVMDestroyChunk, pGMM);
837
838	for (unsigned i = 0; i < RT_ELEMENTS(pGMM->ChunkTLB.aEntries); i++)
839	{
840	pGMM->ChunkTLB.aEntries[i].idChunk = NIL_GMM_CHUNKID;
841	pGMM->ChunkTLB.aEntries[i].pChunk = NULL;
842	}
843
844	memset(&pGMM->Private, 0, sizeof(pGMM->Private));
845	memset(&pGMM->Shared, 0, sizeof(pGMM->Shared));
846
847	memset(&pGMM->bmChunkId[0], 0, sizeof(pGMM->bmChunkId));
848	ASMBitSet(&pGMM->bmChunkId[0], NIL_GMM_CHUNKID);
849
850	pGMM->cReservedPages = 0;
851	pGMM->cOverCommittedPages = 0;
852	pGMM->cAllocatedPages = 0;
853	pGMM->cSharedPages = 0;
854	pGMM->cLeftBehindSharedPages = 0;
855	pGMM->cChunks = 0;
856	pGMM->cBalloonedPages = 0;
857	}
858	else
859	#endif
860	{
861	/*
862	* Walk the entire pool looking for pages that belongs to this VM
863	* and left over mappings. (This'll only catch private pages, shared
864	* pages will be 'left behind'.)
865	*/
866	uint64_t cPrivatePages = pGVM->gmm.s.cPrivatePages; /* save */
867	RTAvlU32DoWithAll(&pGMM->pChunks, true /* fFromLeft */, gmmR0CleanupVMScanChunk, pGVM);
868	if (pGVM->gmm.s.cPrivatePages)
869	SUPR0Printf("GMMR0CleanupVM: hGVM=%#x has %#x private pages that cannot be found!\n", pGVM->hSelf, pGVM->gmm.s.cPrivatePages);
870	pGMM->cAllocatedPages -= cPrivatePages;
871
872	/* free empty chunks. */
873	if (cPrivatePages)
874	{
875	PGMMCHUNK pCur = pGMM->Private.apLists[RT_ELEMENTS(pGMM->Private.apLists) - 1];
876	while (pCur)
877	{
878	PGMMCHUNK pNext = pCur->pFreeNext;
879	if ( pCur->cFree == GMM_CHUNK_NUM_PAGES
880	&& ( !pGMM->fBoundMemoryMode
881	\|\| pCur->hGVM == pGVM->hSelf))
882	gmmR0FreeChunk(pGMM, pGVM, pCur);
883	pCur = pNext;
884	}
885	}
886
887	/* account for shared pages that weren't freed. */
888	if (pGVM->gmm.s.cSharedPages)
889	{
890	Assert(pGMM->cSharedPages >= pGVM->gmm.s.cSharedPages);
891	SUPR0Printf("GMMR0CleanupVM: hGVM=%#x left %#x shared pages behind!\n", pGVM->hSelf, pGVM->gmm.s.cSharedPages);
892	pGMM->cLeftBehindSharedPages += pGVM->gmm.s.cSharedPages;
893	}
894
895	/*
896	* Update the over-commitment management statistics.
897	*/
898	pGMM->cReservedPages -= pGVM->gmm.s.Reserved.cBasePages
899	+ pGVM->gmm.s.Reserved.cFixedPages
900	+ pGVM->gmm.s.Reserved.cShadowPages;
901	switch (pGVM->gmm.s.enmPolicy)
902	{
903	case GMMOCPOLICY_NO_OC:
904	break;
905	default:
906	/** @todo Update GMM->cOverCommittedPages */
907	break;
908	}
909	}
910	}
911
912	/* zap the GVM data. */
913	pGVM->gmm.s.enmPolicy = GMMOCPOLICY_INVALID;
914	pGVM->gmm.s.enmPriority = GMMPRIORITY_INVALID;
915	pGVM->gmm.s.fMayAllocate = false;
916
917	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
918	RTSemFastMutexRelease(pGMM->Mtx);
919
920	LogFlow(("GMMR0CleanupVM: returns\n"));
921	}
922
923
924	/**
925	* RTAvlU32DoWithAll callback.
926	*
927	* @returns 0
928	* @param pNode The node to search.
929	* @param pvGVM Pointer to the shared VM structure.
930	*/
931	static DECLCALLBACK(int) gmmR0CleanupVMScanChunk(PAVLU32NODECORE pNode, void *pvGVM)
932	{
933	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
934	PGVM pGVM = (PGVM)pvGVM;
935
936	/*
937	* Look for pages belonging to the VM.
938	* (Perform some internal checks while we're scanning.)
939	*/
940	#ifndef VBOX_STRICT
941	if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
942	#endif
943	{
944	unsigned cPrivate = 0;
945	unsigned cShared = 0;
946	unsigned cFree = 0;
947
948	gmmR0UnlinkChunk(pChunk); /* avoiding cFreePages updates. */
949
950	uint16_t hGVM = pGVM->hSelf;
951	unsigned iPage = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
952	while (iPage-- > 0)
953	if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
954	{
955	if (pChunk->aPages[iPage].Private.hGVM == hGVM)
956	{
957	/*
958	* Free the page.
959	*
960	* The reason for not using gmmR0FreePrivatePage here is that we
961	* must not cause the chunk to be freed from under us - we're in
962	* an AVL tree walk here.
963	*/
964	pChunk->aPages[iPage].u = 0;
965	pChunk->aPages[iPage].Free.iNext = pChunk->iFreeHead;
966	pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
967	pChunk->iFreeHead = iPage;
968	pChunk->cPrivate--;
969	pChunk->cFree++;
970	pGVM->gmm.s.cPrivatePages--;
971	cFree++;
972	}
973	else
974	cPrivate++;
975	}
976	else if (GMM_PAGE_IS_FREE(&pChunk->aPages[iPage]))
977	cFree++;
978	else
979	cShared++;
980
981	gmmR0LinkChunk(pChunk, pChunk->cShared ? &g_pGMM->Shared : &g_pGMM->Private);
982
983	/*
984	* Did it add up?
985	*/
986	if (RT_UNLIKELY( pChunk->cFree != cFree
987	\|\| pChunk->cPrivate != cPrivate
988	\|\| pChunk->cShared != cShared))
989	{
990	SUPR0Printf("gmmR0CleanupVMScanChunk: Chunk %p/%#x has bogus stats - free=%d/%d private=%d/%d shared=%d/%d\n",
991	pChunk->cFree, cFree, pChunk->cPrivate, cPrivate, pChunk->cShared, cShared);
992	pChunk->cFree = cFree;
993	pChunk->cPrivate = cPrivate;
994	pChunk->cShared = cShared;
995	}
996	}
997
998	/*
999	* Look for the mapping belonging to the terminating VM.
1000	*/
1001	for (unsigned i = 0; i < pChunk->cMappings; i++)
1002	if (pChunk->paMappings[i].pGVM == pGVM)
1003	{
1004	RTR0MEMOBJ MemObj = pChunk->paMappings[i].MapObj;
1005
1006	pChunk->cMappings--;
1007	if (i < pChunk->cMappings)
1008	pChunk->paMappings[i] = pChunk->paMappings[pChunk->cMappings];
1009	pChunk->paMappings[pChunk->cMappings].pGVM = NULL;
1010	pChunk->paMappings[pChunk->cMappings].MapObj = NIL_RTR0MEMOBJ;
1011
1012	int rc = RTR0MemObjFree(MemObj, false /* fFreeMappings (NA) */);
1013	if (RT_FAILURE(rc))
1014	{
1015	SUPR0Printf("gmmR0CleanupVMScanChunk: %p/%#x: mapping #%x: RTRMemObjFree(%p,false) -> %d \n",
1016	pChunk, pChunk->Core.Key, i, MemObj, rc);
1017	AssertRC(rc);
1018	}
1019	break;
1020	}
1021
1022	/*
1023	* If not in bound memory mode, we should reset the hGVM field
1024	* if it has our handle in it.
1025	*/
1026	if (pChunk->hGVM == pGVM->hSelf)
1027	{
1028	if (!g_pGMM->fBoundMemoryMode)
1029	pChunk->hGVM = NIL_GVM_HANDLE;
1030	else if (pChunk->cFree != GMM_CHUNK_NUM_PAGES)
1031	{
1032	SUPR0Printf("gmmR0CleanupVMScanChunk: %p/%#x: cFree=%#x - it should be 0 in bound mode!\n",
1033	pChunk, pChunk->Core.Key, pChunk->cFree);
1034	AssertMsgFailed(("%p/%#x: cFree=%#x - it should be 0 in bound mode!\n", pChunk, pChunk->Core.Key, pChunk->cFree));
1035
1036	gmmR0UnlinkChunk(pChunk);
1037	pChunk->cFree = GMM_CHUNK_NUM_PAGES;
1038	gmmR0LinkChunk(pChunk, pChunk->cShared ? &g_pGMM->Shared : &g_pGMM->Private);
1039	}
1040	}
1041
1042	return 0;
1043	}
1044
1045
1046	/**
1047	* RTAvlU32Destroy callback for GMMR0CleanupVM.
1048	*
1049	* @returns 0
1050	* @param pNode The node (allocation chunk) to destroy.
1051	* @param pvGVM Pointer to the shared VM structure.
1052	*/
1053	/static/ DECLCALLBACK(int) gmmR0CleanupVMDestroyChunk(PAVLU32NODECORE pNode, void *pvGVM)
1054	{
1055	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
1056	PGVM pGVM = (PGVM)pvGVM;
1057
1058	for (unsigned i = 0; i < pChunk->cMappings; i++)
1059	{
1060	if (pChunk->paMappings[i].pGVM != pGVM)
1061	SUPR0Printf("gmmR0CleanupVMDestroyChunk: %p/%#x: mapping #%x: pGVM=%p exepcted %p\n", pChunk,
1062	pChunk->Core.Key, i, pChunk->paMappings[i].pGVM, pGVM);
1063	int rc = RTR0MemObjFree(pChunk->paMappings[i].MapObj, false /* fFreeMappings (NA) */);
1064	if (RT_FAILURE(rc))
1065	{
1066	SUPR0Printf("gmmR0CleanupVMDestroyChunk: %p/%#x: mapping #%x: RTRMemObjFree(%p,false) -> %d \n", pChunk,
1067	pChunk->Core.Key, i, pChunk->paMappings[i].MapObj, rc);
1068	AssertRC(rc);
1069	}
1070	}
1071
1072	int rc = RTR0MemObjFree(pChunk->MemObj, true /* fFreeMappings */);
1073	if (RT_FAILURE(rc))
1074	{
1075	SUPR0Printf("gmmR0CleanupVMDestroyChunk: %p/%#x: RTRMemObjFree(%p,true) -> %d (cMappings=%d)\n", pChunk,
1076	pChunk->Core.Key, pChunk->MemObj, rc, pChunk->cMappings);
1077	AssertRC(rc);
1078	}
1079	pChunk->MemObj = NIL_RTR0MEMOBJ;
1080
1081	RTMemFree(pChunk->paMappings);
1082	pChunk->paMappings = NULL;
1083
1084	RTMemFree(pChunk);
1085	return 0;
1086	}
1087
1088
1089	/**
1090	* The initial resource reservations.
1091	*
1092	* This will make memory reservations according to policy and priority. If there aren't
1093	* sufficient resources available to sustain the VM this function will fail and all
1094	* future allocations requests will fail as well.
1095	*
1096	* These are just the initial reservations made very very early during the VM creation
1097	* process and will be adjusted later in the GMMR0UpdateReservation call after the
1098	* ring-3 init has completed.
1099	*
1100	* @returns VBox status code.
1101	* @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1102	* @retval VERR_GMM_
1103	*
1104	* @param pVM Pointer to the shared VM structure.
1105	* @param idCpu VCPU id
1106	* @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1107	* This does not include MMIO2 and similar.
1108	* @param cShadowPages The number of pages that may be allocated for shadow pageing structures.
1109	* @param cFixedPages The number of pages that may be allocated for fixed objects like the
1110	* hyper heap, MMIO2 and similar.
1111	* @param enmPolicy The OC policy to use on this VM.
1112	* @param enmPriority The priority in an out-of-memory situation.
1113	*
1114	* @thread The creator thread / EMT.
1115	*/
1116	GMMR0DECL(int) GMMR0InitialReservation(PVM pVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages,
1117	GMMOCPOLICY enmPolicy, GMMPRIORITY enmPriority)
1118	{
1119	LogFlow(("GMMR0InitialReservation: pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x enmPolicy=%d enmPriority=%d\n",
1120	pVM, cBasePages, cShadowPages, cFixedPages, enmPolicy, enmPriority));
1121
1122	/*
1123	* Validate, get basics and take the semaphore.
1124	*/
1125	PGMM pGMM;
1126	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1127	PGVM pGVM;
1128	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
1129	if (RT_FAILURE(rc))
1130	return rc;
1131
1132	AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1133	AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1134	AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1135	AssertReturn(enmPolicy > GMMOCPOLICY_INVALID && enmPolicy < GMMOCPOLICY_END, VERR_INVALID_PARAMETER);
1136	AssertReturn(enmPriority > GMMPRIORITY_INVALID && enmPriority < GMMPRIORITY_END, VERR_INVALID_PARAMETER);
1137
1138	rc = RTSemFastMutexRequest(pGMM->Mtx);
1139	AssertRC(rc);
1140	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1141	{
1142	if ( !pGVM->gmm.s.Reserved.cBasePages
1143	&& !pGVM->gmm.s.Reserved.cFixedPages
1144	&& !pGVM->gmm.s.Reserved.cShadowPages)
1145	{
1146	/*
1147	* Check if we can accomodate this.
1148	*/
1149	/* ... later ... */
1150	if (RT_SUCCESS(rc))
1151	{
1152	/*
1153	* Update the records.
1154	*/
1155	pGVM->gmm.s.Reserved.cBasePages = cBasePages;
1156	pGVM->gmm.s.Reserved.cFixedPages = cFixedPages;
1157	pGVM->gmm.s.Reserved.cShadowPages = cShadowPages;
1158	pGVM->gmm.s.enmPolicy = enmPolicy;
1159	pGVM->gmm.s.enmPriority = enmPriority;
1160	pGVM->gmm.s.fMayAllocate = true;
1161
1162	pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1163	pGMM->cRegisteredVMs++;
1164	}
1165	}
1166	else
1167	rc = VERR_WRONG_ORDER;
1168	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1169	}
1170	else
1171	rc = VERR_INTERNAL_ERROR_5;
1172	RTSemFastMutexRelease(pGMM->Mtx);
1173	LogFlow(("GMMR0InitialReservation: returns %Rrc\n", rc));
1174	return rc;
1175	}
1176
1177
1178	/**
1179	* VMMR0 request wrapper for GMMR0InitialReservation.
1180	*
1181	* @returns see GMMR0InitialReservation.
1182	* @param pVM Pointer to the shared VM structure.
1183	* @param idCpu VCPU id
1184	* @param pReq The request packet.
1185	*/
1186	GMMR0DECL(int) GMMR0InitialReservationReq(PVM pVM, VMCPUID idCpu, PGMMINITIALRESERVATIONREQ pReq)
1187	{
1188	/*
1189	* Validate input and pass it on.
1190	*/
1191	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1192	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1193	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
1194
1195	return GMMR0InitialReservation(pVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages, pReq->enmPolicy, pReq->enmPriority);
1196	}
1197
1198
1199	/**
1200	* This updates the memory reservation with the additional MMIO2 and ROM pages.
1201	*
1202	* @returns VBox status code.
1203	* @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1204	*
1205	* @param pVM Pointer to the shared VM structure.
1206	* @param idCpu VCPU id
1207	* @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1208	* This does not include MMIO2 and similar.
1209	* @param cShadowPages The number of pages that may be allocated for shadow pageing structures.
1210	* @param cFixedPages The number of pages that may be allocated for fixed objects like the
1211	* hyper heap, MMIO2 and similar.
1212	*
1213	* @thread EMT.
1214	*/
1215	GMMR0DECL(int) GMMR0UpdateReservation(PVM pVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages)
1216	{
1217	LogFlow(("GMMR0UpdateReservation: pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x\n",
1218	pVM, cBasePages, cShadowPages, cFixedPages));
1219
1220	/*
1221	* Validate, get basics and take the semaphore.
1222	*/
1223	PGMM pGMM;
1224	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1225	PGVM pGVM;
1226	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
1227	if (RT_FAILURE(rc))
1228	return rc;
1229
1230	AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1231	AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1232	AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1233
1234	rc = RTSemFastMutexRequest(pGMM->Mtx);
1235	AssertRC(rc);
1236	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1237	{
1238	if ( pGVM->gmm.s.Reserved.cBasePages
1239	&& pGVM->gmm.s.Reserved.cFixedPages
1240	&& pGVM->gmm.s.Reserved.cShadowPages)
1241	{
1242	/*
1243	* Check if we can accomodate this.
1244	*/
1245	/* ... later ... */
1246	if (RT_SUCCESS(rc))
1247	{
1248	/*
1249	* Update the records.
1250	*/
1251	pGMM->cReservedPages -= pGVM->gmm.s.Reserved.cBasePages
1252	+ pGVM->gmm.s.Reserved.cFixedPages
1253	+ pGVM->gmm.s.Reserved.cShadowPages;
1254	pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1255
1256	pGVM->gmm.s.Reserved.cBasePages = cBasePages;
1257	pGVM->gmm.s.Reserved.cFixedPages = cFixedPages;
1258	pGVM->gmm.s.Reserved.cShadowPages = cShadowPages;
1259	}
1260	}
1261	else
1262	rc = VERR_WRONG_ORDER;
1263	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1264	}
1265	else
1266	rc = VERR_INTERNAL_ERROR_5;
1267	RTSemFastMutexRelease(pGMM->Mtx);
1268	LogFlow(("GMMR0UpdateReservation: returns %Rrc\n", rc));
1269	return rc;
1270	}
1271
1272
1273	/**
1274	* VMMR0 request wrapper for GMMR0UpdateReservation.
1275	*
1276	* @returns see GMMR0UpdateReservation.
1277	* @param pVM Pointer to the shared VM structure.
1278	* @param idCpu VCPU id
1279	* @param pReq The request packet.
1280	*/
1281	GMMR0DECL(int) GMMR0UpdateReservationReq(PVM pVM, VMCPUID idCpu, PGMMUPDATERESERVATIONREQ pReq)
1282	{
1283	/*
1284	* Validate input and pass it on.
1285	*/
1286	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1287	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1288	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
1289
1290	return GMMR0UpdateReservation(pVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages);
1291	}
1292
1293
1294	/**
1295	* Performs sanity checks on a free set.
1296	*
1297	* @returns Error count.
1298	*
1299	* @param pGMM Pointer to the GMM instance.
1300	* @param pSet Pointer to the set.
1301	* @param pszSetName The set name.
1302	* @param pszFunction The function from which it was called.
1303	* @param uLine The line number.
1304	*/
1305	static uint32_t gmmR0SanityCheckSet(PGMM pGMM, PGMMCHUNKFREESET pSet, const char *pszSetName,
1306	const char *pszFunction, unsigned uLineNo)
1307	{
1308	uint32_t cErrors = 0;
1309
1310	/*
1311	* Count the free pages in all the chunks and match it against pSet->cFreePages.
1312	*/
1313	uint32_t cPages = 0;
1314	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1315	{
1316	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1317	{
1318	/** @todo check that the chunk is hash into the right set. */
1319	cPages += pCur->cFree;
1320	}
1321	}
1322	if (RT_UNLIKELY(cPages != pSet->cFreePages))
1323	{
1324	SUPR0Printf("GMM insanity: found %#x pages in the %s set, expected %#x. (%s, line %u)\n",
1325	cPages, pszSetName, pSet->cFreePages, pszFunction, uLineNo);
1326	cErrors++;
1327	}
1328
1329	return cErrors;
1330	}
1331
1332
1333	/**
1334	* Performs some sanity checks on the GMM while owning lock.
1335	*
1336	* @returns Error count.
1337	*
1338	* @param pGMM Pointer to the GMM instance.
1339	* @param pszFunction The function from which it is called.
1340	* @param uLineNo The line number.
1341	*/
1342	static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo)
1343	{
1344	uint32_t cErrors = 0;
1345
1346	cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->Private, "private", pszFunction, uLineNo);
1347	cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->Shared, "shared", pszFunction, uLineNo);
1348	/** @todo add more sanity checks. */
1349
1350	return cErrors;
1351	}
1352
1353
1354	/**
1355	* Looks up a chunk in the tree and fill in the TLB entry for it.
1356	*
1357	* This is not expected to fail and will bitch if it does.
1358	*
1359	* @returns Pointer to the allocation chunk, NULL if not found.
1360	* @param pGMM Pointer to the GMM instance.
1361	* @param idChunk The ID of the chunk to find.
1362	* @param pTlbe Pointer to the TLB entry.
1363	*/
1364	static PGMMCHUNK gmmR0GetChunkSlow(PGMM pGMM, uint32_t idChunk, PGMMCHUNKTLBE pTlbe)
1365	{
1366	PGMMCHUNK pChunk = (PGMMCHUNK)RTAvlU32Get(&pGMM->pChunks, idChunk);
1367	AssertMsgReturn(pChunk, ("Chunk %#x not found!\n", idChunk), NULL);
1368	pTlbe->idChunk = idChunk;
1369	pTlbe->pChunk = pChunk;
1370	return pChunk;
1371	}
1372
1373
1374	/**
1375	* Finds a allocation chunk.
1376	*
1377	* This is not expected to fail and will bitch if it does.
1378	*
1379	* @returns Pointer to the allocation chunk, NULL if not found.
1380	* @param pGMM Pointer to the GMM instance.
1381	* @param idChunk The ID of the chunk to find.
1382	*/
1383	DECLINLINE(PGMMCHUNK) gmmR0GetChunk(PGMM pGMM, uint32_t idChunk)
1384	{
1385	/*
1386	* Do a TLB lookup, branch if not in the TLB.
1387	*/
1388	PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(idChunk)];
1389	if ( pTlbe->idChunk != idChunk
1390	\|\| !pTlbe->pChunk)
1391	return gmmR0GetChunkSlow(pGMM, idChunk, pTlbe);
1392	return pTlbe->pChunk;
1393	}
1394
1395
1396	/**
1397	* Finds a page.
1398	*
1399	* This is not expected to fail and will bitch if it does.
1400	*
1401	* @returns Pointer to the page, NULL if not found.
1402	* @param pGMM Pointer to the GMM instance.
1403	* @param idPage The ID of the page to find.
1404	*/
1405	DECLINLINE(PGMMPAGE) gmmR0GetPage(PGMM pGMM, uint32_t idPage)
1406	{
1407	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
1408	if (RT_LIKELY(pChunk))
1409	return &pChunk->aPages[idPage & GMM_PAGEID_IDX_MASK];
1410	return NULL;
1411	}
1412
1413
1414	/**
1415	* Unlinks the chunk from the free list it's currently on (if any).
1416	*
1417	* @param pChunk The allocation chunk.
1418	*/
1419	DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk)
1420	{
1421	PGMMCHUNKFREESET pSet = pChunk->pSet;
1422	if (RT_LIKELY(pSet))
1423	{
1424	pSet->cFreePages -= pChunk->cFree;
1425
1426	PGMMCHUNK pPrev = pChunk->pFreePrev;
1427	PGMMCHUNK pNext = pChunk->pFreeNext;
1428	if (pPrev)
1429	pPrev->pFreeNext = pNext;
1430	else
1431	pSet->apLists[(pChunk->cFree - 1) >> GMM_CHUNK_FREE_SET_SHIFT] = pNext;
1432	if (pNext)
1433	pNext->pFreePrev = pPrev;
1434
1435	pChunk->pSet = NULL;
1436	pChunk->pFreeNext = NULL;
1437	pChunk->pFreePrev = NULL;
1438	}
1439	else
1440	{
1441	Assert(!pChunk->pFreeNext);
1442	Assert(!pChunk->pFreePrev);
1443	Assert(!pChunk->cFree);
1444	}
1445	}
1446
1447
1448	/**
1449	* Links the chunk onto the appropriate free list in the specified free set.
1450	*
1451	* If no free entries, it's not linked into any list.
1452	*
1453	* @param pChunk The allocation chunk.
1454	* @param pSet The free set.
1455	*/
1456	DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet)
1457	{
1458	Assert(!pChunk->pSet);
1459	Assert(!pChunk->pFreeNext);
1460	Assert(!pChunk->pFreePrev);
1461
1462	if (pChunk->cFree > 0)
1463	{
1464	pChunk->pSet = pSet;
1465	pChunk->pFreePrev = NULL;
1466	unsigned iList = (pChunk->cFree - 1) >> GMM_CHUNK_FREE_SET_SHIFT;
1467	pChunk->pFreeNext = pSet->apLists[iList];
1468	if (pChunk->pFreeNext)
1469	pChunk->pFreeNext->pFreePrev = pChunk;
1470	pSet->apLists[iList] = pChunk;
1471
1472	pSet->cFreePages += pChunk->cFree;
1473	}
1474	}
1475
1476
1477	/**
1478	* Frees a Chunk ID.
1479	*
1480	* @param pGMM Pointer to the GMM instance.
1481	* @param idChunk The Chunk ID to free.
1482	*/
1483	static void gmmR0FreeChunkId(PGMM pGMM, uint32_t idChunk)
1484	{
1485	AssertReturnVoid(idChunk != NIL_GMM_CHUNKID);
1486	AssertMsg(ASMBitTest(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk));
1487	ASMAtomicBitClear(&pGMM->bmChunkId[0], idChunk);
1488	}
1489
1490
1491	/**
1492	* Allocates a new Chunk ID.
1493	*
1494	* @returns The Chunk ID.
1495	* @param pGMM Pointer to the GMM instance.
1496	*/
1497	static uint32_t gmmR0AllocateChunkId(PGMM pGMM)
1498	{
1499	AssertCompile(!((GMM_CHUNKID_LAST + 1) & 31)); /* must be a multiple of 32 */
1500	AssertCompile(NIL_GMM_CHUNKID == 0);
1501
1502	/*
1503	* Try the next sequential one.
1504	*/
1505	int32_t idChunk = ++pGMM->idChunkPrev;
1506	#if 0 /* test the fallback first */
1507	if ( idChunk <= GMM_CHUNKID_LAST
1508	&& idChunk > NIL_GMM_CHUNKID
1509	&& !ASMAtomicBitTestAndSet(&pVMM->bmChunkId[0], idChunk))
1510	return idChunk;
1511	#endif
1512
1513	/*
1514	* Scan sequentially from the last one.
1515	*/
1516	if ( (uint32_t)idChunk < GMM_CHUNKID_LAST
1517	&& idChunk > NIL_GMM_CHUNKID)
1518	{
1519	idChunk = ASMBitNextClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1, idChunk);
1520	if (idChunk > NIL_GMM_CHUNKID)
1521	{
1522	AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
1523	return pGMM->idChunkPrev = idChunk;
1524	}
1525	}
1526
1527	/*
1528	* Ok, scan from the start.
1529	* We're not racing anyone, so there is no need to expect failures or have restart loops.
1530	*/
1531	idChunk = ASMBitFirstClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1);
1532	AssertMsgReturn(idChunk > NIL_GMM_CHUNKID, ("%#x\n", idChunk), NIL_GVM_HANDLE);
1533	AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
1534
1535	return pGMM->idChunkPrev = idChunk;
1536	}
1537
1538
1539	/**
1540	* Registers a new chunk of memory.
1541	*
1542	* This is called by both gmmR0AllocateOneChunk and GMMR0SeedChunk. The caller
1543	* must own the global lock.
1544	*
1545	* @returns VBox status code.
1546	* @param pGMM Pointer to the GMM instance.
1547	* @param pSet Pointer to the set.
1548	* @param MemObj The memory object for the chunk.
1549	* @param hGVM The affinity of the chunk. NIL_GVM_HANDLE for no
1550	* affinity.
1551	* @param enmChunkType Chunk type (continuous or non-continuous)
1552	* @param ppChunk Chunk address (out)
1553	*/
1554	static int gmmR0RegisterChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, RTR0MEMOBJ MemObj, uint16_t hGVM, GMMCHUNKTYPE enmChunkType, PGMMCHUNK *ppChunk = NULL)
1555	{
1556	Assert(hGVM != NIL_GVM_HANDLE \|\| pGMM->fBoundMemoryMode);
1557
1558	int rc;
1559	PGMMCHUNK pChunk = (PGMMCHUNK)RTMemAllocZ(sizeof(*pChunk));
1560	if (pChunk)
1561	{
1562	/*
1563	* Initialize it.
1564	*/
1565	pChunk->MemObj = MemObj;
1566	pChunk->cFree = GMM_CHUNK_NUM_PAGES;
1567	pChunk->hGVM = hGVM;
1568	pChunk->iFreeHead = 0;
1569	pChunk->enmType = enmChunkType;
1570	for (unsigned iPage = 0; iPage < RT_ELEMENTS(pChunk->aPages) - 1; iPage++)
1571	{
1572	pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
1573	pChunk->aPages[iPage].Free.iNext = iPage + 1;
1574	}
1575	pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.u2State = GMM_PAGE_STATE_FREE;
1576	pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.iNext = UINT16_MAX;
1577
1578	/*
1579	* Allocate a Chunk ID and insert it into the tree.
1580	* This has to be done behind the mutex of course.
1581	*/
1582	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1583	{
1584	pChunk->Core.Key = gmmR0AllocateChunkId(pGMM);
1585	if ( pChunk->Core.Key != NIL_GMM_CHUNKID
1586	&& pChunk->Core.Key <= GMM_CHUNKID_LAST
1587	&& RTAvlU32Insert(&pGMM->pChunks, &pChunk->Core))
1588	{
1589	pGMM->cChunks++;
1590	gmmR0LinkChunk(pChunk, pSet);
1591	LogFlow(("gmmR0RegisterChunk: pChunk=%p id=%#x cChunks=%d\n", pChunk, pChunk->Core.Key, pGMM->cChunks));
1592
1593	if (ppChunk)
1594	*ppChunk = pChunk;
1595
1596	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1597	return VINF_SUCCESS;
1598	}
1599
1600	/* bail out */
1601	rc = VERR_INTERNAL_ERROR;
1602	}
1603	else
1604	rc = VERR_INTERNAL_ERROR_5;
1605
1606	RTMemFree(pChunk);
1607	}
1608	else
1609	rc = VERR_NO_MEMORY;
1610	return rc;
1611	}
1612
1613
1614	/**
1615	* Allocate one new chunk and add it to the specified free set.
1616	*
1617	* @returns VBox status code.
1618	* @param pGMM Pointer to the GMM instance.
1619	* @param pSet Pointer to the set.
1620	* @param hGVM The affinity of the new chunk.
1621	* @param enmChunkType Chunk type (continuous or non-continuous)
1622	* @param ppChunk Chunk address (out)
1623	*
1624	* @remarks Called without owning the mutex.
1625	*/
1626	static int gmmR0AllocateOneChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, uint16_t hGVM, GMMCHUNKTYPE enmChunkType, PGMMCHUNK *ppChunk = NULL)
1627	{
1628	/*
1629	* Allocate the memory.
1630	*/
1631	RTR0MEMOBJ MemObj;
1632	int rc;
1633
1634	AssertCompile(GMM_CHUNK_SIZE == _2M);
1635	AssertReturn(enmChunkType == GMMCHUNKTYPE_NON_CONTINUOUS \|\| enmChunkType == GMMCHUNKTYPE_CONTINUOUS, VERR_INVALID_PARAMETER);
1636
1637	/* Leave the lock temporarily as the allocation might take long. */
1638	RTSemFastMutexRelease(pGMM->Mtx);
1639	if (enmChunkType == GMMCHUNKTYPE_NON_CONTINUOUS)
1640	rc = RTR0MemObjAllocPhysNC(&MemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS);
1641	else
1642	rc = RTR0MemObjAllocPhysEx(&MemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS, GMM_CHUNK_SIZE);
1643
1644	/* Grab the lock again. */
1645	int rc2 = RTSemFastMutexRequest(pGMM->Mtx);
1646	AssertRCReturn(rc2, rc2);
1647
1648	if (RT_SUCCESS(rc))
1649	{
1650	rc = gmmR0RegisterChunk(pGMM, pSet, MemObj, hGVM, enmChunkType, ppChunk);
1651	if (RT_FAILURE(rc))
1652	RTR0MemObjFree(MemObj, false /* fFreeMappings */);
1653	}
1654	/** @todo Check that RTR0MemObjAllocPhysNC always returns VERR_NO_MEMORY on
1655	* allocation failure. */
1656	return rc;
1657	}
1658
1659
1660	/**
1661	* Attempts to allocate more pages until the requested amount is met.
1662	*
1663	* @returns VBox status code.
1664	* @param pGMM Pointer to the GMM instance data.
1665	* @param pGVM The calling VM.
1666	* @param pSet Pointer to the free set to grow.
1667	* @param cPages The number of pages needed.
1668	*
1669	* @remarks Called owning the mutex, but will leave it temporarily while
1670	* allocating the memory!
1671	*/
1672	static int gmmR0AllocateMoreChunks(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet, uint32_t cPages)
1673	{
1674	Assert(!pGMM->fLegacyAllocationMode);
1675
1676	if (!GMM_CHECK_SANITY_IN_LOOPS(pGMM))
1677	return VERR_INTERNAL_ERROR_4;
1678
1679	if (!pGMM->fBoundMemoryMode)
1680	{
1681	/*
1682	* Try steal free chunks from the other set first. (Only take 100% free chunks.)
1683	*/
1684	PGMMCHUNKFREESET pOtherSet = pSet == &pGMM->Private ? &pGMM->Shared : &pGMM->Private;
1685	while ( pSet->cFreePages < cPages
1686	&& pOtherSet->cFreePages >= GMM_CHUNK_NUM_PAGES)
1687	{
1688	PGMMCHUNK pChunk = pOtherSet->apLists[GMM_CHUNK_FREE_SET_LISTS - 1];
1689	while (pChunk && pChunk->cFree != GMM_CHUNK_NUM_PAGES)
1690	pChunk = pChunk->pFreeNext;
1691	if (!pChunk)
1692	break;
1693
1694	gmmR0UnlinkChunk(pChunk);
1695	gmmR0LinkChunk(pChunk, pSet);
1696	}
1697
1698	/*
1699	* If we need still more pages, allocate new chunks.
1700	* Note! We will leave the mutex while doing the allocation,
1701	*/
1702	while (pSet->cFreePages < cPages)
1703	{
1704	int rc = gmmR0AllocateOneChunk(pGMM, pSet, pGVM->hSelf, GMMCHUNKTYPE_NON_CONTINUOUS);
1705	if (RT_FAILURE(rc))
1706	return rc;
1707	if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1708	return VERR_INTERNAL_ERROR_5;
1709	}
1710	}
1711	else
1712	{
1713	/*
1714	* The memory is bound to the VM allocating it, so we have to count
1715	* the free pages carefully as well as making sure we brand them with
1716	* our VM handle.
1717	*
1718	* Note! We will leave the mutex while doing the allocation,
1719	*/
1720	uint16_t const hGVM = pGVM->hSelf;
1721	for (;;)
1722	{
1723	/* Count and see if we've reached the goal. */
1724	uint32_t cPagesFound = 0;
1725	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1726	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1727	if (pCur->hGVM == hGVM)
1728	{
1729	cPagesFound += pCur->cFree;
1730	if (cPagesFound >= cPages)
1731	break;
1732	}
1733	if (cPagesFound >= cPages)
1734	break;
1735
1736	/* Allocate more. */
1737	int rc = gmmR0AllocateOneChunk(pGMM, pSet, hGVM, GMMCHUNKTYPE_NON_CONTINUOUS);
1738	if (RT_FAILURE(rc))
1739	return rc;
1740	if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1741	return VERR_INTERNAL_ERROR_5;
1742	}
1743	}
1744
1745	return VINF_SUCCESS;
1746	}
1747
1748
1749	/**
1750	* Allocates one private page.
1751	*
1752	* Worker for gmmR0AllocatePages.
1753	*
1754	* @param pGMM Pointer to the GMM instance data.
1755	* @param hGVM The GVM handle of the VM requesting memory.
1756	* @param pChunk The chunk to allocate it from.
1757	* @param pPageDesc The page descriptor.
1758	*/
1759	static void gmmR0AllocatePage(PGMM pGMM, uint32_t hGVM, PGMMCHUNK pChunk, PGMMPAGEDESC pPageDesc)
1760	{
1761	/* update the chunk stats. */
1762	if (pChunk->hGVM == NIL_GVM_HANDLE)
1763	pChunk->hGVM = hGVM;
1764	Assert(pChunk->cFree);
1765	pChunk->cFree--;
1766	pChunk->cPrivate++;
1767
1768	/* unlink the first free page. */
1769	const uint32_t iPage = pChunk->iFreeHead;
1770	AssertReleaseMsg(iPage < RT_ELEMENTS(pChunk->aPages), ("%d\n", iPage));
1771	PGMMPAGE pPage = &pChunk->aPages[iPage];
1772	Assert(GMM_PAGE_IS_FREE(pPage));
1773	pChunk->iFreeHead = pPage->Free.iNext;
1774	Log3(("A pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x iNext=%#x\n",
1775	pPage, iPage, (pChunk->Core.Key << GMM_CHUNKID_SHIFT) \| iPage,
1776	pPage->Common.u2State, pChunk->iFreeHead, pPage->Free.iNext));
1777
1778	/* make the page private. */
1779	pPage->u = 0;
1780	AssertCompile(GMM_PAGE_STATE_PRIVATE == 0);
1781	pPage->Private.hGVM = hGVM;
1782	AssertCompile(NIL_RTHCPHYS >= GMM_GCPHYS_LAST);
1783	AssertCompile(GMM_GCPHYS_UNSHAREABLE >= GMM_GCPHYS_LAST);
1784	if (pPageDesc->HCPhysGCPhys <= GMM_GCPHYS_LAST)
1785	pPage->Private.pfn = pPageDesc->HCPhysGCPhys >> PAGE_SHIFT;
1786	else
1787	pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE; /* unshareable / unassigned - same thing. */
1788
1789	/* update the page descriptor. */
1790	pPageDesc->HCPhysGCPhys = RTR0MemObjGetPagePhysAddr(pChunk->MemObj, iPage);
1791	Assert(pPageDesc->HCPhysGCPhys != NIL_RTHCPHYS);
1792	pPageDesc->idPage = (pChunk->Core.Key << GMM_CHUNKID_SHIFT) \| iPage;
1793	pPageDesc->idSharedPage = NIL_GMM_PAGEID;
1794	}
1795
1796
1797	/**
1798	* Common worker for GMMR0AllocateHandyPages and GMMR0AllocatePages.
1799	*
1800	* @returns VBox status code:
1801	* @retval VINF_SUCCESS on success.
1802	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk or
1803	* gmmR0AllocateMoreChunks is necessary.
1804	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
1805	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
1806	* that is we're trying to allocate more than we've reserved.
1807	*
1808	* @param pGMM Pointer to the GMM instance data.
1809	* @param pGVM Pointer to the shared VM structure.
1810	* @param cPages The number of pages to allocate.
1811	* @param paPages Pointer to the page descriptors.
1812	* See GMMPAGEDESC for details on what is expected on input.
1813	* @param enmAccount The account to charge.
1814	*/
1815	static int gmmR0AllocatePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
1816	{
1817	/*
1818	* Check allocation limits.
1819	*/
1820	if (RT_UNLIKELY(pGMM->cAllocatedPages + cPages > pGMM->cMaxPages))
1821	return VERR_GMM_HIT_GLOBAL_LIMIT;
1822
1823	switch (enmAccount)
1824	{
1825	case GMMACCOUNT_BASE:
1826	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cPages > pGVM->gmm.s.Reserved.cBasePages))
1827	{
1828	Log(("gmmR0AllocatePages:Base: Reserved=%#llx Allocated+Ballooned+Requested=%#llx+%#llx+%#x!\n",
1829	pGVM->gmm.s.Reserved.cBasePages, pGVM->gmm.s.Allocated.cBasePages, pGVM->gmm.s.cBalloonedPages, cPages));
1830	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
1831	}
1832	break;
1833	case GMMACCOUNT_SHADOW:
1834	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cShadowPages + cPages > pGVM->gmm.s.Reserved.cShadowPages))
1835	{
1836	Log(("gmmR0AllocatePages:Shadow: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
1837	pGVM->gmm.s.Reserved.cShadowPages, pGVM->gmm.s.Allocated.cShadowPages, cPages));
1838	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
1839	}
1840	break;
1841	case GMMACCOUNT_FIXED:
1842	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cFixedPages + cPages > pGVM->gmm.s.Reserved.cFixedPages))
1843	{
1844	Log(("gmmR0AllocatePages:Fixed: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
1845	pGVM->gmm.s.Reserved.cFixedPages, pGVM->gmm.s.Allocated.cFixedPages, cPages));
1846	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
1847	}
1848	break;
1849	default:
1850	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
1851	}
1852
1853	/*
1854	* Check if we need to allocate more memory or not. In bound memory mode this
1855	* is a bit extra work but it's easier to do it upfront than bailing out later.
1856	*/
1857	PGMMCHUNKFREESET pSet = &pGMM->Private;
1858	if (pSet->cFreePages < cPages)
1859	return VERR_GMM_SEED_ME;
1860	if (pGMM->fBoundMemoryMode)
1861	{
1862	uint16_t hGVM = pGVM->hSelf;
1863	uint32_t cPagesFound = 0;
1864	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1865	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1866	if (pCur->hGVM == hGVM)
1867	{
1868	cPagesFound += pCur->cFree;
1869	if (cPagesFound >= cPages)
1870	break;
1871	}
1872	if (cPagesFound < cPages)
1873	return VERR_GMM_SEED_ME;
1874	}
1875
1876	/*
1877	* Pick the pages.
1878	* Try make some effort keeping VMs sharing private chunks.
1879	*/
1880	uint16_t hGVM = pGVM->hSelf;
1881	uint32_t iPage = 0;
1882
1883	/* first round, pick from chunks with an affinity to the VM. */
1884	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists) && iPage < cPages; i++)
1885	{
1886	PGMMCHUNK pCurFree = NULL;
1887	PGMMCHUNK pCur = pSet->apLists[i];
1888	while (pCur && iPage < cPages)
1889	{
1890	PGMMCHUNK pNext = pCur->pFreeNext;
1891
1892	if ( pCur->hGVM == hGVM
1893	&& pCur->cFree < GMM_CHUNK_NUM_PAGES)
1894	{
1895	gmmR0UnlinkChunk(pCur);
1896	for (; pCur->cFree && iPage < cPages; iPage++)
1897	gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
1898	gmmR0LinkChunk(pCur, pSet);
1899	}
1900
1901	pCur = pNext;
1902	}
1903	}
1904
1905	if (iPage < cPages)
1906	{
1907	/* second round, pick pages from the 100% empty chunks we just skipped above. */
1908	PGMMCHUNK pCurFree = NULL;
1909	PGMMCHUNK pCur = pSet->apLists[RT_ELEMENTS(pSet->apLists) - 1];
1910	while (pCur && iPage < cPages)
1911	{
1912	PGMMCHUNK pNext = pCur->pFreeNext;
1913
1914	if ( pCur->cFree == GMM_CHUNK_NUM_PAGES
1915	&& ( pCur->hGVM == hGVM
1916	\|\| !pGMM->fBoundMemoryMode))
1917	{
1918	gmmR0UnlinkChunk(pCur);
1919	for (; pCur->cFree && iPage < cPages; iPage++)
1920	gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
1921	gmmR0LinkChunk(pCur, pSet);
1922	}
1923
1924	pCur = pNext;
1925	}
1926	}
1927
1928	if ( iPage < cPages
1929	&& !pGMM->fBoundMemoryMode)
1930	{
1931	/* third round, disregard affinity. */
1932	unsigned i = RT_ELEMENTS(pSet->apLists);
1933	while (i-- > 0 && iPage < cPages)
1934	{
1935	PGMMCHUNK pCurFree = NULL;
1936	PGMMCHUNK pCur = pSet->apLists[i];
1937	while (pCur && iPage < cPages)
1938	{
1939	PGMMCHUNK pNext = pCur->pFreeNext;
1940
1941	if ( pCur->cFree > GMM_CHUNK_NUM_PAGES / 2
1942	&& cPages >= GMM_CHUNK_NUM_PAGES / 2)
1943	pCur->hGVM = hGVM; /* change chunk affinity */
1944
1945	gmmR0UnlinkChunk(pCur);
1946	for (; pCur->cFree && iPage < cPages; iPage++)
1947	gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
1948	gmmR0LinkChunk(pCur, pSet);
1949
1950	pCur = pNext;
1951	}
1952	}
1953	}
1954
1955	/*
1956	* Update the account.
1957	*/
1958	switch (enmAccount)
1959	{
1960	case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages += iPage; break;
1961	case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages += iPage; break;
1962	case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages += iPage; break;
1963	default:
1964	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
1965	}
1966	pGVM->gmm.s.cPrivatePages += iPage;
1967	pGMM->cAllocatedPages += iPage;
1968
1969	AssertMsgReturn(iPage == cPages, ("%u != %u\n", iPage, cPages), VERR_INTERNAL_ERROR);
1970
1971	/*
1972	* Check if we've reached some threshold and should kick one or two VMs and tell
1973	* them to inflate their balloons a bit more... later.
1974	*/
1975
1976	return VINF_SUCCESS;
1977	}
1978
1979
1980	/**
1981	* Updates the previous allocations and allocates more pages.
1982	*
1983	* The handy pages are always taken from the 'base' memory account.
1984	* The allocated pages are not cleared and will contains random garbage.
1985	*
1986	* @returns VBox status code:
1987	* @retval VINF_SUCCESS on success.
1988	* @retval VERR_NOT_OWNER if the caller is not an EMT.
1989	* @retval VERR_GMM_PAGE_NOT_FOUND if one of the pages to update wasn't found.
1990	* @retval VERR_GMM_PAGE_NOT_PRIVATE if one of the pages to update wasn't a
1991	* private page.
1992	* @retval VERR_GMM_PAGE_NOT_SHARED if one of the pages to update wasn't a
1993	* shared page.
1994	* @retval VERR_GMM_NOT_PAGE_OWNER if one of the pages to be updated wasn't
1995	* owned by the VM.
1996	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
1997	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
1998	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
1999	* that is we're trying to allocate more than we've reserved.
2000	*
2001	* @param pVM Pointer to the shared VM structure.
2002	* @param idCpu VCPU id
2003	* @param cPagesToUpdate The number of pages to update (starting from the head).
2004	* @param cPagesToAlloc The number of pages to allocate (starting from the head).
2005	* @param paPages The array of page descriptors.
2006	* See GMMPAGEDESC for details on what is expected on input.
2007	* @thread EMT.
2008	*/
2009	GMMR0DECL(int) GMMR0AllocateHandyPages(PVM pVM, VMCPUID idCpu, uint32_t cPagesToUpdate, uint32_t cPagesToAlloc, PGMMPAGEDESC paPages)
2010	{
2011	LogFlow(("GMMR0AllocateHandyPages: pVM=%p cPagesToUpdate=%#x cPagesToAlloc=%#x paPages=%p\n",
2012	pVM, cPagesToUpdate, cPagesToAlloc, paPages));
2013
2014	/*
2015	* Validate, get basics and take the semaphore.
2016	* (This is a relatively busy path, so make predictions where possible.)
2017	*/
2018	PGMM pGMM;
2019	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2020	PGVM pGVM;
2021	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2022	if (RT_FAILURE(rc))
2023	return rc;
2024
2025	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2026	AssertMsgReturn( (cPagesToUpdate && cPagesToUpdate < 1024)
2027	\|\| (cPagesToAlloc && cPagesToAlloc < 1024),
2028	("cPagesToUpdate=%#x cPagesToAlloc=%#x\n", cPagesToUpdate, cPagesToAlloc),
2029	VERR_INVALID_PARAMETER);
2030
2031	unsigned iPage = 0;
2032	for (; iPage < cPagesToUpdate; iPage++)
2033	{
2034	AssertMsgReturn( ( paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
2035	&& !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK))
2036	\|\| paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
2037	\|\| paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE,
2038	("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys),
2039	VERR_INVALID_PARAMETER);
2040	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2041	/\|\| paPages[iPage].idPage == NIL_GMM_PAGEID/,
2042	("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2043	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2044	/\|\| paPages[iPage].idSharedPage == NIL_GMM_PAGEID/,
2045	("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2046	}
2047
2048	for (; iPage < cPagesToAlloc; iPage++)
2049	{
2050	AssertMsgReturn(paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS, ("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys), VERR_INVALID_PARAMETER);
2051	AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2052	AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2053	}
2054
2055	rc = RTSemFastMutexRequest(pGMM->Mtx);
2056	AssertRC(rc);
2057	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2058	{
2059
2060	/* No allocations before the initial reservation has been made! */
2061	if (RT_LIKELY( pGVM->gmm.s.Reserved.cBasePages
2062	&& pGVM->gmm.s.Reserved.cFixedPages
2063	&& pGVM->gmm.s.Reserved.cShadowPages))
2064	{
2065	/*
2066	* Perform the updates.
2067	* Stop on the first error.
2068	*/
2069	for (iPage = 0; iPage < cPagesToUpdate; iPage++)
2070	{
2071	if (paPages[iPage].idPage != NIL_GMM_PAGEID)
2072	{
2073	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idPage);
2074	if (RT_LIKELY(pPage))
2075	{
2076	if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
2077	{
2078	if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
2079	{
2080	AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
2081	if (RT_LIKELY(paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST))
2082	pPage->Private.pfn = paPages[iPage].HCPhysGCPhys >> PAGE_SHIFT;
2083	else if (paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE)
2084	pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE;
2085	/* else: NIL_RTHCPHYS nothing */
2086
2087	paPages[iPage].idPage = NIL_GMM_PAGEID;
2088	paPages[iPage].HCPhysGCPhys = NIL_RTHCPHYS;
2089	}
2090	else
2091	{
2092	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not owner! hGVM=%#x hSelf=%#x\n",
2093	iPage, paPages[iPage].idPage, pPage->Private.hGVM, pGVM->hSelf));
2094	rc = VERR_GMM_NOT_PAGE_OWNER;
2095	break;
2096	}
2097	}
2098	else
2099	{
2100	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not private! %.Rhxs (type %d)\n", iPage, paPages[iPage].idPage, sizeof(pPage), pPage, pPage->Common.u2State));
2101	rc = VERR_GMM_PAGE_NOT_PRIVATE;
2102	break;
2103	}
2104	}
2105	else
2106	{
2107	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (private)\n", iPage, paPages[iPage].idPage));
2108	rc = VERR_GMM_PAGE_NOT_FOUND;
2109	break;
2110	}
2111	}
2112
2113	if (paPages[iPage].idSharedPage != NIL_GMM_PAGEID)
2114	{
2115	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idSharedPage);
2116	if (RT_LIKELY(pPage))
2117	{
2118	if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
2119	{
2120	AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
2121	Assert(pPage->Shared.cRefs);
2122	Assert(pGVM->gmm.s.cSharedPages);
2123	Assert(pGVM->gmm.s.Allocated.cBasePages);
2124
2125	Log(("GMMR0AllocateHandyPages: free shared page %x cRefs=%d\n", paPages[iPage].idSharedPage, pPage->Shared.cRefs));
2126	pGVM->gmm.s.cSharedPages--;
2127	pGVM->gmm.s.Allocated.cBasePages--;
2128	if (!--pPage->Shared.cRefs)
2129	gmmR0FreeSharedPage(pGMM, paPages[iPage].idSharedPage, pPage);
2130
2131	paPages[iPage].idSharedPage = NIL_GMM_PAGEID;
2132	}
2133	else
2134	{
2135	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not shared!\n", iPage, paPages[iPage].idSharedPage));
2136	rc = VERR_GMM_PAGE_NOT_SHARED;
2137	break;
2138	}
2139	}
2140	else
2141	{
2142	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (shared)\n", iPage, paPages[iPage].idSharedPage));
2143	rc = VERR_GMM_PAGE_NOT_FOUND;
2144	break;
2145	}
2146	}
2147	}
2148
2149	/*
2150	* Join paths with GMMR0AllocatePages for the allocation.
2151	* Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
2152	*/
2153	while (RT_SUCCESS(rc))
2154	{
2155	rc = gmmR0AllocatePages(pGMM, pGVM, cPagesToAlloc, paPages, GMMACCOUNT_BASE);
2156	if ( rc != VERR_GMM_SEED_ME
2157	\|\| pGMM->fLegacyAllocationMode)
2158	break;
2159	rc = gmmR0AllocateMoreChunks(pGMM, pGVM, &pGMM->Private, cPagesToAlloc);
2160	}
2161	}
2162	else
2163	rc = VERR_WRONG_ORDER;
2164	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2165	}
2166	else
2167	rc = VERR_INTERNAL_ERROR_5;
2168	RTSemFastMutexRelease(pGMM->Mtx);
2169	LogFlow(("GMMR0AllocateHandyPages: returns %Rrc\n", rc));
2170	return rc;
2171	}
2172
2173
2174	/**
2175	* Allocate one or more pages.
2176	*
2177	* This is typically used for ROMs and MMIO2 (VRAM) during VM creation.
2178	* The allocated pages are not cleared and will contains random garbage.
2179	*
2180	* @returns VBox status code:
2181	* @retval VINF_SUCCESS on success.
2182	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2183	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2184	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2185	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2186	* that is we're trying to allocate more than we've reserved.
2187	*
2188	* @param pVM Pointer to the shared VM structure.
2189	* @param idCpu VCPU id
2190	* @param cPages The number of pages to allocate.
2191	* @param paPages Pointer to the page descriptors.
2192	* See GMMPAGEDESC for details on what is expected on input.
2193	* @param enmAccount The account to charge.
2194	*
2195	* @thread EMT.
2196	*/
2197	GMMR0DECL(int) GMMR0AllocatePages(PVM pVM, VMCPUID idCpu, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
2198	{
2199	LogFlow(("GMMR0AllocatePages: pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pVM, cPages, paPages, enmAccount));
2200
2201	/*
2202	* Validate, get basics and take the semaphore.
2203	*/
2204	PGMM pGMM;
2205	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2206	PGVM pGVM;
2207	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2208	if (RT_FAILURE(rc))
2209	return rc;
2210
2211	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2212	AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
2213	AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
2214
2215	for (unsigned iPage = 0; iPage < cPages; iPage++)
2216	{
2217	AssertMsgReturn( paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
2218	\|\| paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE
2219	\|\| ( enmAccount == GMMACCOUNT_BASE
2220	&& paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
2221	&& !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK)),
2222	("#%#x: %RHp enmAccount=%d\n", iPage, paPages[iPage].HCPhysGCPhys, enmAccount),
2223	VERR_INVALID_PARAMETER);
2224	AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2225	AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2226	}
2227
2228	rc = RTSemFastMutexRequest(pGMM->Mtx);
2229	AssertRC(rc);
2230	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2231	{
2232
2233	/* No allocations before the initial reservation has been made! */
2234	if (RT_LIKELY( pGVM->gmm.s.Reserved.cBasePages
2235	&& pGVM->gmm.s.Reserved.cFixedPages
2236	&& pGVM->gmm.s.Reserved.cShadowPages))
2237	{
2238	/*
2239	* gmmR0AllocatePages seed loop.
2240	* Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
2241	*/
2242	while (RT_SUCCESS(rc))
2243	{
2244	rc = gmmR0AllocatePages(pGMM, pGVM, cPages, paPages, enmAccount);
2245	if ( rc != VERR_GMM_SEED_ME
2246	\|\| pGMM->fLegacyAllocationMode)
2247	break;
2248	rc = gmmR0AllocateMoreChunks(pGMM, pGVM, &pGMM->Private, cPages);
2249	}
2250	}
2251	else
2252	rc = VERR_WRONG_ORDER;
2253	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2254	}
2255	else
2256	rc = VERR_INTERNAL_ERROR_5;
2257	RTSemFastMutexRelease(pGMM->Mtx);
2258	LogFlow(("GMMR0AllocatePages: returns %Rrc\n", rc));
2259	return rc;
2260	}
2261
2262
2263	/**
2264	* VMMR0 request wrapper for GMMR0AllocatePages.
2265	*
2266	* @returns see GMMR0AllocatePages.
2267	* @param pVM Pointer to the shared VM structure.
2268	* @param idCpu VCPU id
2269	* @param pReq The request packet.
2270	*/
2271	GMMR0DECL(int) GMMR0AllocatePagesReq(PVM pVM, VMCPUID idCpu, PGMMALLOCATEPAGESREQ pReq)
2272	{
2273	/*
2274	* Validate input and pass it on.
2275	*/
2276	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2277	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2278	AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0]),
2279	("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0])),
2280	VERR_INVALID_PARAMETER);
2281	AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[pReq->cPages]),
2282	("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[pReq->cPages])),
2283	VERR_INVALID_PARAMETER);
2284
2285	return GMMR0AllocatePages(pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
2286	}
2287
2288	/**
2289	* Allocate a large page to represent guest RAM
2290	*
2291	* The allocated pages are not cleared and will contains random garbage.
2292	*
2293	* @returns VBox status code:
2294	* @retval VINF_SUCCESS on success.
2295	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2296	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2297	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2298	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2299	* that is we're trying to allocate more than we've reserved.
2300	* @returns see GMMR0AllocatePages.
2301	* @param pVM Pointer to the shared VM structure.
2302	* @param idCpu VCPU id
2303	* @param cbPage Large page size
2304	*/
2305	GMMR0DECL(int) GMMR0AllocateLargePage(PVM pVM, VMCPUID idCpu, uint32_t cbPage, uint32_t pIdPage, RTHCPHYS pHCPhys)
2306	{
2307	LogFlow(("GMMR0AllocateLargePage: pVM=%p cbPage=%x\n", pVM, cbPage));
2308
2309	AssertReturn(cbPage == GMM_CHUNK_SIZE, VERR_INVALID_PARAMETER);
2310	AssertPtrReturn(pIdPage, VERR_INVALID_PARAMETER);
2311	AssertPtrReturn(pHCPhys, VERR_INVALID_PARAMETER);
2312
2313	/*
2314	* Validate, get basics and take the semaphore.
2315	*/
2316	PGMM pGMM;
2317	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2318	PGVM pGVM;
2319	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2320	if (RT_FAILURE(rc))
2321	return rc;
2322
2323	/* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
2324	if (pGMM->fLegacyAllocationMode)
2325	return VERR_NOT_SUPPORTED;
2326
2327	*pHCPhys = NIL_RTHCPHYS;
2328	*pIdPage = NIL_GMM_PAGEID;
2329
2330	rc = RTSemFastMutexRequest(pGMM->Mtx);
2331	AssertRCReturn(rc, rc);
2332	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2333	{
2334	const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
2335	PGMMCHUNK pChunk;
2336	GMMPAGEDESC PageDesc;
2337
2338	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cPages > pGVM->gmm.s.Reserved.cBasePages))
2339	{
2340	Log(("GMMR0AllocateLargePage: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
2341	pGVM->gmm.s.Reserved.cBasePages, pGVM->gmm.s.Allocated.cBasePages, cPages));
2342	RTSemFastMutexRelease(pGMM->Mtx);
2343	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2344	}
2345
2346	/* Allocate a new continous chunk. */
2347	rc = gmmR0AllocateOneChunk(pGMM, &pGMM->Private, pGVM->hSelf, GMMCHUNKTYPE_CONTINUOUS, &pChunk);
2348	if (RT_FAILURE(rc))
2349	{
2350	RTSemFastMutexRelease(pGMM->Mtx);
2351	return rc;
2352	}
2353
2354	/* Unlink the new chunk from the free list. */
2355	gmmR0UnlinkChunk(pChunk);
2356
2357	/* Allocate all pages. */
2358	gmmR0AllocatePage(pGMM, pGVM->hSelf, pChunk, &PageDesc);
2359	/* Return the first page as we'll use the whole chunk as one big page. */
2360	*pIdPage = PageDesc.idPage;
2361	*pHCPhys = PageDesc.HCPhysGCPhys;
2362
2363	for (unsigned i = 1; i < cPages; i++)
2364	gmmR0AllocatePage(pGMM, pGVM->hSelf, pChunk, &PageDesc);
2365
2366	/* Update accounting. */
2367	pGVM->gmm.s.Allocated.cBasePages += cPages;
2368	pGVM->gmm.s.cPrivatePages += cPages;
2369	pGMM->cAllocatedPages += cPages;
2370
2371	gmmR0LinkChunk(pChunk, &pGMM->Private);
2372	}
2373	else
2374	rc = VERR_INTERNAL_ERROR_5;
2375
2376	RTSemFastMutexRelease(pGMM->Mtx);
2377	LogFlow(("GMMR0AllocatePages: returns %Rrc\n", rc));
2378	return rc;
2379	}
2380
2381
2382	/**
2383	* Free a large page
2384	*
2385	* @returns VBox status code:
2386	* @param pVM Pointer to the shared VM structure.
2387	* @param idCpu VCPU id
2388	* @param idPage Large page id
2389	*/
2390	GMMR0DECL(int) GMMR0FreeLargePage(PVM pVM, VMCPUID idCpu, uint32_t idPage)
2391	{
2392	LogFlow(("GMMR0FreeLargePage: pVM=%p idPage=%x\n", pVM, idPage));
2393
2394	/*
2395	* Validate, get basics and take the semaphore.
2396	*/
2397	PGMM pGMM;
2398	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2399	PGVM pGVM;
2400	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2401	if (RT_FAILURE(rc))
2402	return rc;
2403
2404	/* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
2405	if (pGMM->fLegacyAllocationMode)
2406	return VERR_NOT_SUPPORTED;
2407
2408	rc = RTSemFastMutexRequest(pGMM->Mtx);
2409	AssertRC(rc);
2410	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2411	{
2412	const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
2413
2414	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages < cPages))
2415	{
2416	Log(("GMMR0FreeLargePage: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cBasePages, cPages));
2417	RTSemFastMutexRelease(pGMM->Mtx);
2418	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2419	}
2420
2421	PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
2422	if ( RT_LIKELY(pPage)
2423	&& RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
2424	{
2425	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2426	Assert(pChunk);
2427	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2428	Assert(pChunk->cPrivate > 0);
2429
2430	/* Release the memory immediately. */
2431	gmmR0FreeChunk(pGMM, NULL, pChunk);
2432
2433	/* Update accounting. */
2434	pGVM->gmm.s.Allocated.cBasePages -= cPages;
2435	pGVM->gmm.s.cPrivatePages -= cPages;
2436	pGMM->cAllocatedPages -= cPages;
2437	}
2438	else
2439	rc = VERR_GMM_PAGE_NOT_FOUND;
2440	}
2441	else
2442	rc = VERR_INTERNAL_ERROR_5;
2443
2444	RTSemFastMutexRelease(pGMM->Mtx);
2445	LogFlow(("GMMR0FreeLargePage: returns %Rrc\n", rc));
2446	return rc;
2447	}
2448
2449
2450	/**
2451	* VMMR0 request wrapper for GMMR0FreeLargePage.
2452	*
2453	* @returns see GMMR0FreeLargePage.
2454	* @param pVM Pointer to the shared VM structure.
2455	* @param idCpu VCPU id
2456	* @param pReq The request packet.
2457	*/
2458	GMMR0DECL(int) GMMR0FreeLargePageReq(PVM pVM, VMCPUID idCpu, PGMMFREELARGEPAGEREQ pReq)
2459	{
2460	/*
2461	* Validate input and pass it on.
2462	*/
2463	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2464	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2465	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMFREEPAGESREQ),
2466	("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(GMMFREEPAGESREQ)),
2467	VERR_INVALID_PARAMETER);
2468
2469	return GMMR0FreeLargePage(pVM, idCpu, pReq->idPage);
2470	}
2471
2472	/**
2473	* Frees a chunk, giving it back to the host OS.
2474	*
2475	* @param pGMM Pointer to the GMM instance.
2476	* @param pGVM This is set when called from GMMR0CleanupVM so we can
2477	* unmap and free the chunk in one go.
2478	* @param pChunk The chunk to free.
2479	*/
2480	static void gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
2481	{
2482	Assert(pChunk->Core.Key != NIL_GMM_CHUNKID);
2483
2484	/*
2485	* Cleanup hack! Unmap the chunk from the callers address space.
2486	*/
2487	if ( pChunk->cMappings
2488	&& pGVM)
2489	gmmR0UnmapChunk(pGMM, pGVM, pChunk);
2490
2491	/*
2492	* If there are current mappings of the chunk, then request the
2493	* VMs to unmap them. Reposition the chunk in the free list so
2494	* it won't be a likely candidate for allocations.
2495	*/
2496	if (pChunk->cMappings)
2497	{
2498	/** @todo R0 -> VM request */
2499	/* The chunk can be owned by more than one VM if fBoundMemoryMode is false! */
2500	}
2501	else
2502	{
2503	/*
2504	* Try free the memory object.
2505	*/
2506	int rc = RTR0MemObjFree(pChunk->MemObj, false /* fFreeMappings */);
2507	if (RT_SUCCESS(rc))
2508	{
2509	pChunk->MemObj = NIL_RTR0MEMOBJ;
2510
2511	/*
2512	* Unlink it from everywhere.
2513	*/
2514	gmmR0UnlinkChunk(pChunk);
2515
2516	PAVLU32NODECORE pCore = RTAvlU32Remove(&pGMM->pChunks, pChunk->Core.Key);
2517	Assert(pCore == &pChunk->Core); NOREF(pCore);
2518
2519	PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(pChunk->Core.Key)];
2520	if (pTlbe->pChunk == pChunk)
2521	{
2522	pTlbe->idChunk = NIL_GMM_CHUNKID;
2523	pTlbe->pChunk = NULL;
2524	}
2525
2526	Assert(pGMM->cChunks > 0);
2527	pGMM->cChunks--;
2528
2529	/*
2530	* Free the Chunk ID and struct.
2531	*/
2532	gmmR0FreeChunkId(pGMM, pChunk->Core.Key);
2533	pChunk->Core.Key = NIL_GMM_CHUNKID;
2534
2535	RTMemFree(pChunk->paMappings);
2536	pChunk->paMappings = NULL;
2537
2538	RTMemFree(pChunk);
2539	}
2540	else
2541	AssertRC(rc);
2542	}
2543	}
2544
2545
2546	/**
2547	* Free page worker.
2548	*
2549	* The caller does all the statistic decrementing, we do all the incrementing.
2550	*
2551	* @param pGMM Pointer to the GMM instance data.
2552	* @param pChunk Pointer to the chunk this page belongs to.
2553	* @param idPage The Page ID.
2554	* @param pPage Pointer to the page.
2555	*/
2556	static void gmmR0FreePageWorker(PGMM pGMM, PGMMCHUNK pChunk, uint32_t idPage, PGMMPAGE pPage)
2557	{
2558	Log3(("F pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x\n",
2559	pPage, pPage - &pChunk->aPages[0], idPage, pPage->Common.u2State, pChunk->iFreeHead)); NOREF(idPage);
2560
2561	/*
2562	* Put the page on the free list.
2563	*/
2564	pPage->u = 0;
2565	pPage->Free.u2State = GMM_PAGE_STATE_FREE;
2566	Assert(pChunk->iFreeHead < RT_ELEMENTS(pChunk->aPages) \|\| pChunk->iFreeHead == UINT16_MAX);
2567	pPage->Free.iNext = pChunk->iFreeHead;
2568	pChunk->iFreeHead = pPage - &pChunk->aPages[0];
2569
2570	/*
2571	* Update statistics (the cShared/cPrivate stats are up to date already),
2572	* and relink the chunk if necessary.
2573	*/
2574	if ((pChunk->cFree & GMM_CHUNK_FREE_SET_MASK) == 0)
2575	{
2576	gmmR0UnlinkChunk(pChunk);
2577	pChunk->cFree++;
2578	gmmR0LinkChunk(pChunk, pChunk->cShared ? &pGMM->Shared : &pGMM->Private);
2579	}
2580	else
2581	{
2582	pChunk->cFree++;
2583	pChunk->pSet->cFreePages++;
2584
2585	/*
2586	* If the chunk becomes empty, consider giving memory back to the host OS.
2587	*
2588	* The current strategy is to try give it back if there are other chunks
2589	* in this free list, meaning if there are at least 240 free pages in this
2590	* category. Note that since there are probably mappings of the chunk,
2591	* it won't be freed up instantly, which probably screws up this logic
2592	* a bit...
2593	*/
2594	if (RT_UNLIKELY( pChunk->cFree == GMM_CHUNK_NUM_PAGES
2595	&& pChunk->pFreeNext
2596	&& pChunk->pFreePrev
2597	&& !pGMM->fLegacyAllocationMode))
2598	gmmR0FreeChunk(pGMM, NULL, pChunk);
2599	}
2600	}
2601
2602
2603	/**
2604	* Frees a shared page, the page is known to exist and be valid and such.
2605	*
2606	* @param pGMM Pointer to the GMM instance.
2607	* @param idPage The Page ID
2608	* @param pPage The page structure.
2609	*/
2610	DECLINLINE(void) gmmR0FreeSharedPage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage)
2611	{
2612	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2613	Assert(pChunk);
2614	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2615	Assert(pChunk->cShared > 0);
2616	Assert(pGMM->cSharedPages > 0);
2617	Assert(pGMM->cAllocatedPages > 0);
2618	Assert(!pPage->Shared.cRefs);
2619
2620	pChunk->cShared--;
2621	pGMM->cAllocatedPages--;
2622	pGMM->cSharedPages--;
2623	gmmR0FreePageWorker(pGMM, pChunk, idPage, pPage);
2624	}
2625
2626	#ifdef VBOX_WITH_PAGE_SHARING
2627	/**
2628	* Converts a private page to a shared page, the page is known to exist and be valid and such.
2629	*
2630	* @param pGMM Pointer to the GMM instance.
2631	* @param pGVM Pointer to the GVM instance.
2632	* @param HCPhys Host physical address
2633	* @param idPage The Page ID
2634	* @param pPage The page structure.
2635	*/
2636	DECLINLINE(void) gmmR0ConvertToSharedPage(PGMM pGMM, PGVM pGVM, RTHCPHYS HCPhys, uint32_t idPage, PGMMPAGE pPage)
2637	{
2638	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2639	Assert(pChunk);
2640	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2641	Assert(GMM_PAGE_IS_PRIVATE(pPage));
2642
2643	pChunk->cPrivate--;
2644	pChunk->cShared++;
2645
2646	pGMM->cSharedPages++;
2647
2648	pGVM->gmm.s.cSharedPages++;
2649	pGVM->gmm.s.cPrivatePages--;
2650
2651	/* Modify the page structure. */
2652	pPage->Shared.pfn = (uint32_t)(uint64_t)(HCPhys >> PAGE_SHIFT);
2653	pPage->Shared.cRefs = 1;
2654	pPage->Common.u2State = GMM_PAGE_STATE_SHARED;
2655	}
2656
2657	/**
2658	* Increase the use count of a shared page, the page is known to exist and be valid and such.
2659	*
2660	* @param pGMM Pointer to the GMM instance.
2661	* @param pGVM Pointer to the GVM instance.
2662	* @param pPage The page structure.
2663	*/
2664	DECLINLINE(void) gmmR0UseSharedPage(PGMM pGMM, PGVM pGVM, PGMMPAGE pPage)
2665	{
2666	Assert(pGMM->cSharedPages > 0);
2667	Assert(pGMM->cAllocatedPages > 0);
2668
2669	pPage->Shared.cRefs++;
2670	pGVM->gmm.s.cSharedPages++;
2671	pGVM->gmm.s.Allocated.cBasePages++;
2672	}
2673	#endif
2674
2675	/**
2676	* Frees a private page, the page is known to exist and be valid and such.
2677	*
2678	* @param pGMM Pointer to the GMM instance.
2679	* @param idPage The Page ID
2680	* @param pPage The page structure.
2681	*/
2682	DECLINLINE(void) gmmR0FreePrivatePage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage)
2683	{
2684	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2685	Assert(pChunk);
2686	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2687	Assert(pChunk->cPrivate > 0);
2688	Assert(pGMM->cAllocatedPages > 0);
2689
2690	pChunk->cPrivate--;
2691	pGMM->cAllocatedPages--;
2692	gmmR0FreePageWorker(pGMM, pChunk, idPage, pPage);
2693	}
2694
2695	/**
2696	* Common worker for GMMR0FreePages and GMMR0BalloonedPages.
2697	*
2698	* @returns VBox status code:
2699	* @retval xxx
2700	*
2701	* @param pGMM Pointer to the GMM instance data.
2702	* @param pGVM Pointer to the shared VM structure.
2703	* @param cPages The number of pages to free.
2704	* @param paPages Pointer to the page descriptors.
2705	* @param enmAccount The account this relates to.
2706	*/
2707	static int gmmR0FreePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
2708	{
2709	/*
2710	* Check that the request isn't impossible wrt to the account status.
2711	*/
2712	switch (enmAccount)
2713	{
2714	case GMMACCOUNT_BASE:
2715	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages < cPages))
2716	{
2717	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cBasePages, cPages));
2718	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2719	}
2720	break;
2721	case GMMACCOUNT_SHADOW:
2722	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cShadowPages < cPages))
2723	{
2724	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cShadowPages, cPages));
2725	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2726	}
2727	break;
2728	case GMMACCOUNT_FIXED:
2729	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cFixedPages < cPages))
2730	{
2731	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cFixedPages, cPages));
2732	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2733	}
2734	break;
2735	default:
2736	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2737	}
2738
2739	/*
2740	* Walk the descriptors and free the pages.
2741	*
2742	* Statistics (except the account) are being updated as we go along,
2743	* unlike the alloc code. Also, stop on the first error.
2744	*/
2745	int rc = VINF_SUCCESS;
2746	uint32_t iPage;
2747	for (iPage = 0; iPage < cPages; iPage++)
2748	{
2749	uint32_t idPage = paPages[iPage].idPage;
2750	PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
2751	if (RT_LIKELY(pPage))
2752	{
2753	if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
2754	{
2755	if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
2756	{
2757	Assert(pGVM->gmm.s.cPrivatePages);
2758	pGVM->gmm.s.cPrivatePages--;
2759	gmmR0FreePrivatePage(pGMM, idPage, pPage);
2760	}
2761	else
2762	{
2763	Log(("gmmR0AllocatePages: #%#x/%#x: not owner! hGVM=%#x hSelf=%#x\n", iPage, idPage,
2764	pPage->Private.hGVM, pGVM->hSelf));
2765	rc = VERR_GMM_NOT_PAGE_OWNER;
2766	break;
2767	}
2768	}
2769	else if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
2770	{
2771	Assert(pGVM->gmm.s.cSharedPages);
2772	pGVM->gmm.s.cSharedPages--;
2773	Assert(pPage->Shared.cRefs);
2774	if (!--pPage->Shared.cRefs)
2775	gmmR0FreeSharedPage(pGMM, idPage, pPage);
2776	}
2777	else
2778	{
2779	Log(("gmmR0AllocatePages: #%#x/%#x: already free!\n", iPage, idPage));
2780	rc = VERR_GMM_PAGE_ALREADY_FREE;
2781	break;
2782	}
2783	}
2784	else
2785	{
2786	Log(("gmmR0AllocatePages: #%#x/%#x: not found!\n", iPage, idPage));
2787	rc = VERR_GMM_PAGE_NOT_FOUND;
2788	break;
2789	}
2790	paPages[iPage].idPage = NIL_GMM_PAGEID;
2791	}
2792
2793	/*
2794	* Update the account.
2795	*/
2796	switch (enmAccount)
2797	{
2798	case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages -= iPage; break;
2799	case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages -= iPage; break;
2800	case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages -= iPage; break;
2801	default:
2802	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2803	}
2804
2805	/*
2806	* Any threshold stuff to be done here?
2807	*/
2808
2809	return rc;
2810	}
2811
2812
2813	/**
2814	* Free one or more pages.
2815	*
2816	* This is typically used at reset time or power off.
2817	*
2818	* @returns VBox status code:
2819	* @retval xxx
2820	*
2821	* @param pVM Pointer to the shared VM structure.
2822	* @param idCpu VCPU id
2823	* @param cPages The number of pages to allocate.
2824	* @param paPages Pointer to the page descriptors containing the Page IDs for each page.
2825	* @param enmAccount The account this relates to.
2826	* @thread EMT.
2827	*/
2828	GMMR0DECL(int) GMMR0FreePages(PVM pVM, VMCPUID idCpu, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
2829	{
2830	LogFlow(("GMMR0FreePages: pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pVM, cPages, paPages, enmAccount));
2831
2832	/*
2833	* Validate input and get the basics.
2834	*/
2835	PGMM pGMM;
2836	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2837	PGVM pGVM;
2838	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2839	if (RT_FAILURE(rc))
2840	return rc;
2841
2842	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2843	AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
2844	AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
2845
2846	for (unsigned iPage = 0; iPage < cPages; iPage++)
2847	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2848	/\|\| paPages[iPage].idPage == NIL_GMM_PAGEID/,
2849	("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2850
2851	/*
2852	* Take the semaphore and call the worker function.
2853	*/
2854	rc = RTSemFastMutexRequest(pGMM->Mtx);
2855	AssertRC(rc);
2856	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2857	{
2858	rc = gmmR0FreePages(pGMM, pGVM, cPages, paPages, enmAccount);
2859	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2860	}
2861	else
2862	rc = VERR_INTERNAL_ERROR_5;
2863	RTSemFastMutexRelease(pGMM->Mtx);
2864	LogFlow(("GMMR0FreePages: returns %Rrc\n", rc));
2865	return rc;
2866	}
2867
2868
2869	/**
2870	* VMMR0 request wrapper for GMMR0FreePages.
2871	*
2872	* @returns see GMMR0FreePages.
2873	* @param pVM Pointer to the shared VM structure.
2874	* @param idCpu VCPU id
2875	* @param pReq The request packet.
2876	*/
2877	GMMR0DECL(int) GMMR0FreePagesReq(PVM pVM, VMCPUID idCpu, PGMMFREEPAGESREQ pReq)
2878	{
2879	/*
2880	* Validate input and pass it on.
2881	*/
2882	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2883	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2884	AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0]),
2885	("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0])),
2886	VERR_INVALID_PARAMETER);
2887	AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[pReq->cPages]),
2888	("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[pReq->cPages])),
2889	VERR_INVALID_PARAMETER);
2890
2891	return GMMR0FreePages(pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
2892	}
2893
2894
2895	/**
2896	* Report back on a memory ballooning request.
2897	*
2898	* The request may or may not have been initiated by the GMM. If it was initiated
2899	* by the GMM it is important that this function is called even if no pages were
2900	* ballooned.
2901	*
2902	* @returns VBox status code:
2903	* @retval VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH
2904	* @retval VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH
2905	* @retval VERR_GMM_OVERCOMMITED_TRY_AGAIN_IN_A_BIT - reset condition
2906	* indicating that we won't necessarily have sufficient RAM to boot
2907	* the VM again and that it should pause until this changes (we'll try
2908	* balloon some other VM). (For standard deflate we have little choice
2909	* but to hope the VM won't use the memory that was returned to it.)
2910	*
2911	* @param pVM Pointer to the shared VM structure.
2912	* @param idCpu VCPU id
2913	* @param enmAction Inflate/deflate/reset
2914	* @param cBalloonedPages The number of pages that was ballooned.
2915	*
2916	* @thread EMT.
2917	*/
2918	GMMR0DECL(int) GMMR0BalloonedPages(PVM pVM, VMCPUID idCpu, GMMBALLOONACTION enmAction, uint32_t cBalloonedPages)
2919	{
2920	LogFlow(("GMMR0BalloonedPages: pVM=%p enmAction=%d cBalloonedPages=%#x\n",
2921	pVM, enmAction, cBalloonedPages));
2922
2923	AssertMsgReturn(cBalloonedPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cBalloonedPages), VERR_INVALID_PARAMETER);
2924
2925	/*
2926	* Validate input and get the basics.
2927	*/
2928	PGMM pGMM;
2929	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2930	PGVM pGVM;
2931	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2932	if (RT_FAILURE(rc))
2933	return rc;
2934
2935	/*
2936	* Take the sempahore and do some more validations.
2937	*/
2938	rc = RTSemFastMutexRequest(pGMM->Mtx);
2939	AssertRC(rc);
2940	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2941	{
2942	switch (enmAction)
2943	{
2944	case GMMBALLOONACTION_INFLATE:
2945	{
2946	if (pGVM->gmm.s.Allocated.cBasePages >= cBalloonedPages)
2947	{
2948	/*
2949	* Record the ballooned memory.
2950	*/
2951	pGMM->cBalloonedPages += cBalloonedPages;
2952	if (pGVM->gmm.s.cReqBalloonedPages)
2953	{
2954	/* Codepath never taken. Might be interesting in the future to request ballooned memory from guests in low memory conditions.. */
2955	AssertFailed();
2956
2957	pGVM->gmm.s.cBalloonedPages += cBalloonedPages;
2958	pGVM->gmm.s.cReqActuallyBalloonedPages += cBalloonedPages;
2959	Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx Req=%#llx Actual=%#llx (pending)\n", cBalloonedPages,
2960	pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqBalloonedPages, pGVM->gmm.s.cReqActuallyBalloonedPages));
2961	}
2962	else
2963	{
2964	pGVM->gmm.s.cBalloonedPages += cBalloonedPages;
2965	Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx (user)\n",
2966	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages));
2967	}
2968	}
2969	else
2970	rc = VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2971	break;
2972	}
2973
2974	case GMMBALLOONACTION_DEFLATE:
2975	{
2976	/* Deflate. */
2977	if (pGVM->gmm.s.cBalloonedPages >= cBalloonedPages)
2978	{
2979	/*
2980	* Record the ballooned memory.
2981	*/
2982	Assert(pGMM->cBalloonedPages >= cBalloonedPages);
2983	pGMM->cBalloonedPages -= cBalloonedPages;
2984	pGVM->gmm.s.cBalloonedPages -= cBalloonedPages;
2985	if (pGVM->gmm.s.cReqDeflatePages)
2986	{
2987	AssertFailed(); /* This is path is for later. */
2988	Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx Req=%#llx\n",
2989	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqDeflatePages));
2990
2991	/*
2992	* Anything we need to do here now when the request has been completed?
2993	*/
2994	pGVM->gmm.s.cReqDeflatePages = 0;
2995	}
2996	else
2997	Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx (user)\n",
2998	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages));
2999	}
3000	else
3001	rc = VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH;
3002	break;
3003	}
3004
3005	case GMMBALLOONACTION_RESET:
3006	{
3007	/* Reset to an empty balloon. */
3008	Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.cBalloonedPages);
3009
3010	pGMM->cBalloonedPages -= pGVM->gmm.s.cBalloonedPages;
3011	pGVM->gmm.s.cBalloonedPages = 0;
3012	break;
3013	}
3014
3015	default:
3016	rc = VERR_INVALID_PARAMETER;
3017	break;
3018	}
3019	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3020	}
3021	else
3022	rc = VERR_INTERNAL_ERROR_5;
3023
3024	RTSemFastMutexRelease(pGMM->Mtx);
3025	LogFlow(("GMMR0BalloonedPages: returns %Rrc\n", rc));
3026	return rc;
3027	}
3028
3029
3030	/**
3031	* VMMR0 request wrapper for GMMR0BalloonedPages.
3032	*
3033	* @returns see GMMR0BalloonedPages.
3034	* @param pVM Pointer to the shared VM structure.
3035	* @param idCpu VCPU id
3036	* @param pReq The request packet.
3037	*/
3038	GMMR0DECL(int) GMMR0BalloonedPagesReq(PVM pVM, VMCPUID idCpu, PGMMBALLOONEDPAGESREQ pReq)
3039	{
3040	/*
3041	* Validate input and pass it on.
3042	*/
3043	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3044	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3045	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMBALLOONEDPAGESREQ),
3046	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMBALLOONEDPAGESREQ)),
3047	VERR_INVALID_PARAMETER);
3048
3049	return GMMR0BalloonedPages(pVM, idCpu, pReq->enmAction, pReq->cBalloonedPages);
3050	}
3051
3052	/**
3053	* Return memory statistics for the hypervisor
3054	*
3055	* @returns VBox status code:
3056	* @param pVM Pointer to the shared VM structure.
3057	* @param pReq The request packet.
3058	*/
3059	GMMR0DECL(int) GMMR0QueryHypervisorMemoryStatsReq(PVM pVM, PGMMMEMSTATSREQ pReq)
3060	{
3061	/*
3062	* Validate input and pass it on.
3063	*/
3064	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3065	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3066	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
3067	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
3068	VERR_INVALID_PARAMETER);
3069
3070	/*
3071	* Validate input and get the basics.
3072	*/
3073	PGMM pGMM;
3074	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3075	pReq->cAllocPages = pGMM->cAllocatedPages;
3076	pReq->cFreePages = (pGMM->cChunks << (GMM_CHUNK_SHIFT- PAGE_SHIFT)) - pGMM->cAllocatedPages;
3077	pReq->cBalloonedPages = pGMM->cBalloonedPages;
3078	pReq->cMaxPages = pGMM->cMaxPages;
3079	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3080
3081	return VINF_SUCCESS;
3082	}
3083
3084	/**
3085	* Return memory statistics for the VM
3086	*
3087	* @returns VBox status code:
3088	* @param pVM Pointer to the shared VM structure.
3089	* @parma idCpu Cpu id.
3090	* @param pReq The request packet.
3091	*/
3092	GMMR0DECL(int) GMMR0QueryMemoryStatsReq(PVM pVM, VMCPUID idCpu, PGMMMEMSTATSREQ pReq)
3093	{
3094	/*
3095	* Validate input and pass it on.
3096	*/
3097	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3098	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3099	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
3100	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
3101	VERR_INVALID_PARAMETER);
3102
3103	/*
3104	* Validate input and get the basics.
3105	*/
3106	PGMM pGMM;
3107	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3108	PGVM pGVM;
3109	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3110	if (RT_FAILURE(rc))
3111	return rc;
3112
3113	/*
3114	* Take the sempahore and do some more validations.
3115	*/
3116	rc = RTSemFastMutexRequest(pGMM->Mtx);
3117	AssertRC(rc);
3118	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3119	{
3120	pReq->cAllocPages = pGVM->gmm.s.Allocated.cBasePages;
3121	pReq->cBalloonedPages = pGVM->gmm.s.cBalloonedPages;
3122	pReq->cMaxPages = pGVM->gmm.s.Reserved.cBasePages;
3123	pReq->cFreePages = pReq->cMaxPages - pReq->cAllocPages;
3124	}
3125	else
3126	rc = VERR_INTERNAL_ERROR_5;
3127
3128	RTSemFastMutexRelease(pGMM->Mtx);
3129	LogFlow(("GMMR3QueryVMMemoryStats: returns %Rrc\n", rc));
3130	return rc;
3131	}
3132
3133	/**
3134	* Unmaps a chunk previously mapped into the address space of the current process.
3135	*
3136	* @returns VBox status code.
3137	* @param pGMM Pointer to the GMM instance data.
3138	* @param pGVM Pointer to the Global VM structure.
3139	* @param pChunk Pointer to the chunk to be unmapped.
3140	*/
3141	static int gmmR0UnmapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
3142	{
3143	if (!pGMM->fLegacyAllocationMode)
3144	{
3145	/*
3146	* Find the mapping and try unmapping it.
3147	*/
3148	for (uint32_t i = 0; i < pChunk->cMappings; i++)
3149	{
3150	Assert(pChunk->paMappings[i].pGVM && pChunk->paMappings[i].MapObj != NIL_RTR0MEMOBJ);
3151	if (pChunk->paMappings[i].pGVM == pGVM)
3152	{
3153	/* unmap */
3154	int rc = RTR0MemObjFree(pChunk->paMappings[i].MapObj, false /* fFreeMappings (NA) */);
3155	if (RT_SUCCESS(rc))
3156	{
3157	/* update the record. */
3158	pChunk->cMappings--;
3159	if (i < pChunk->cMappings)
3160	pChunk->paMappings[i] = pChunk->paMappings[pChunk->cMappings];
3161	pChunk->paMappings[pChunk->cMappings].MapObj = NIL_RTR0MEMOBJ;
3162	pChunk->paMappings[pChunk->cMappings].pGVM = NULL;
3163	}
3164	return rc;
3165	}
3166	}
3167	}
3168	else if (pChunk->hGVM == pGVM->hSelf)
3169	return VINF_SUCCESS;
3170
3171	Log(("gmmR0MapChunk: Chunk %#x is not mapped into pGVM=%p/%#x\n", pChunk->Core.Key, pGVM, pGVM->hSelf));
3172	return VERR_GMM_CHUNK_NOT_MAPPED;
3173	}
3174
3175
3176	/**
3177	* Maps a chunk into the user address space of the current process.
3178	*
3179	* @returns VBox status code.
3180	* @param pGMM Pointer to the GMM instance data.
3181	* @param pGVM Pointer to the Global VM structure.
3182	* @param pChunk Pointer to the chunk to be mapped.
3183	* @param ppvR3 Where to store the ring-3 address of the mapping.
3184	* In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
3185	* contain the address of the existing mapping.
3186	*/
3187	static int gmmR0MapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
3188	{
3189	/*
3190	* If we're in legacy mode this is simple.
3191	*/
3192	if (pGMM->fLegacyAllocationMode)
3193	{
3194	if (pChunk->hGVM != pGVM->hSelf)
3195	{
3196	Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
3197	return VERR_GMM_CHUNK_NOT_FOUND;
3198	}
3199
3200	*ppvR3 = RTR0MemObjAddressR3(pChunk->MemObj);
3201	return VINF_SUCCESS;
3202	}
3203
3204	/*
3205	* Check to see if the chunk is already mapped.
3206	*/
3207	for (uint32_t i = 0; i < pChunk->cMappings; i++)
3208	{
3209	Assert(pChunk->paMappings[i].pGVM && pChunk->paMappings[i].MapObj != NIL_RTR0MEMOBJ);
3210	if (pChunk->paMappings[i].pGVM == pGVM)
3211	{
3212	*ppvR3 = RTR0MemObjAddressR3(pChunk->paMappings[i].MapObj);
3213	Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
3214	#ifdef VBOX_WITH_PAGE_SHARING
3215	/* The ring-3 chunk cache can be out of sync; don't fail. */
3216	return VINF_SUCCESS;
3217	#else
3218	return VERR_GMM_CHUNK_ALREADY_MAPPED;
3219	#endif
3220	}
3221	}
3222
3223	/*
3224	* Do the mapping.
3225	*/
3226	RTR0MEMOBJ MapObj;
3227	int rc = RTR0MemObjMapUser(&MapObj, pChunk->MemObj, (RTR3PTR)-1, 0, RTMEM_PROT_READ \| RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
3228	if (RT_SUCCESS(rc))
3229	{
3230	/* reallocate the array? */
3231	if ((pChunk->cMappings & 1 /7/) == 0)
3232	{
3233	void pvMappings = RTMemRealloc(pChunk->paMappings, (pChunk->cMappings + 2 /8/) sizeof(pChunk->paMappings[0]));
3234	if (RT_UNLIKELY(!pvMappings))
3235	{
3236	rc = RTR0MemObjFree(MapObj, false /* fFreeMappings (NA) */);
3237	AssertRC(rc);
3238	return VERR_NO_MEMORY;
3239	}
3240	pChunk->paMappings = (PGMMCHUNKMAP)pvMappings;
3241	}
3242
3243	/* insert new entry */
3244	pChunk->paMappings[pChunk->cMappings].MapObj = MapObj;
3245	pChunk->paMappings[pChunk->cMappings].pGVM = pGVM;
3246	pChunk->cMappings++;
3247
3248	*ppvR3 = RTR0MemObjAddressR3(MapObj);
3249	}
3250
3251	return rc;
3252	}
3253
3254	/**
3255	* Check if a chunk is mapped into the specified VM
3256	*
3257	* @returns mapped yes/no
3258	* @param pGVM Pointer to the Global VM structure.
3259	* @param pChunk Pointer to the chunk to be mapped.
3260	* @param ppvR3 Where to store the ring-3 address of the mapping.
3261	*/
3262	static int gmmR0IsChunkMapped(PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
3263	{
3264	/*
3265	* Check to see if the chunk is already mapped.
3266	*/
3267	for (uint32_t i = 0; i < pChunk->cMappings; i++)
3268	{
3269	Assert(pChunk->paMappings[i].pGVM && pChunk->paMappings[i].MapObj != NIL_RTR0MEMOBJ);
3270	if (pChunk->paMappings[i].pGVM == pGVM)
3271	{
3272	*ppvR3 = RTR0MemObjAddressR3(pChunk->paMappings[i].MapObj);
3273	return true;
3274	}
3275	}
3276	*ppvR3 = NULL;
3277	return false;
3278	}
3279
3280	/**
3281	* Map a chunk and/or unmap another chunk.
3282	*
3283	* The mapping and unmapping applies to the current process.
3284	*
3285	* This API does two things because it saves a kernel call per mapping when
3286	* when the ring-3 mapping cache is full.
3287	*
3288	* @returns VBox status code.
3289	* @param pVM The VM.
3290	* @param idCpu VCPU id
3291	* @param idChunkMap The chunk to map. NIL_GMM_CHUNKID if nothing to map.
3292	* @param idChunkUnmap The chunk to unmap. NIL_GMM_CHUNKID if nothing to unmap.
3293	* @param ppvR3 Where to store the address of the mapped chunk. NULL is ok if nothing to map.
3294	* @thread EMT
3295	*/
3296	GMMR0DECL(int) GMMR0MapUnmapChunk(PVM pVM, VMCPUID idCpu, uint32_t idChunkMap, uint32_t idChunkUnmap, PRTR3PTR ppvR3)
3297	{
3298	LogFlow(("GMMR0MapUnmapChunk: pVM=%p idChunkMap=%#x idChunkUnmap=%#x ppvR3=%p\n",
3299	pVM, idChunkMap, idChunkUnmap, ppvR3));
3300
3301	/*
3302	* Validate input and get the basics.
3303	*/
3304	PGMM pGMM;
3305	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3306	PGVM pGVM;
3307	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3308	if (RT_FAILURE(rc))
3309	return rc;
3310
3311	AssertCompile(NIL_GMM_CHUNKID == 0);
3312	AssertMsgReturn(idChunkMap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkMap), VERR_INVALID_PARAMETER);
3313	AssertMsgReturn(idChunkUnmap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkUnmap), VERR_INVALID_PARAMETER);
3314
3315	if ( idChunkMap == NIL_GMM_CHUNKID
3316	&& idChunkUnmap == NIL_GMM_CHUNKID)
3317	return VERR_INVALID_PARAMETER;
3318
3319	if (idChunkMap != NIL_GMM_CHUNKID)
3320	{
3321	AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3322	*ppvR3 = NIL_RTR3PTR;
3323	}
3324
3325	/*
3326	* Take the semaphore and do the work.
3327	*
3328	* The unmapping is done last since it's easier to undo a mapping than
3329	* undoing an unmapping. The ring-3 mapping cache cannot not be so big
3330	* that it pushes the user virtual address space to within a chunk of
3331	* it it's limits, so, no problem here.
3332	*/
3333	rc = RTSemFastMutexRequest(pGMM->Mtx);
3334	AssertRC(rc);
3335	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3336	{
3337	PGMMCHUNK pMap = NULL;
3338	if (idChunkMap != NIL_GVM_HANDLE)
3339	{
3340	pMap = gmmR0GetChunk(pGMM, idChunkMap);
3341	if (RT_LIKELY(pMap))
3342	rc = gmmR0MapChunk(pGMM, pGVM, pMap, ppvR3);
3343	else
3344	{
3345	Log(("GMMR0MapUnmapChunk: idChunkMap=%#x\n", idChunkMap));
3346	rc = VERR_GMM_CHUNK_NOT_FOUND;
3347	}
3348	}
3349
3350	if ( idChunkUnmap != NIL_GMM_CHUNKID
3351	&& RT_SUCCESS(rc))
3352	{
3353	PGMMCHUNK pUnmap = gmmR0GetChunk(pGMM, idChunkUnmap);
3354	if (RT_LIKELY(pUnmap))
3355	rc = gmmR0UnmapChunk(pGMM, pGVM, pUnmap);
3356	else
3357	{
3358	Log(("GMMR0MapUnmapChunk: idChunkUnmap=%#x\n", idChunkUnmap));
3359	rc = VERR_GMM_CHUNK_NOT_FOUND;
3360	}
3361
3362	if (RT_FAILURE(rc) && pMap)
3363	gmmR0UnmapChunk(pGMM, pGVM, pMap);
3364	}
3365
3366	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3367	}
3368	else
3369	rc = VERR_INTERNAL_ERROR_5;
3370	RTSemFastMutexRelease(pGMM->Mtx);
3371
3372	LogFlow(("GMMR0MapUnmapChunk: returns %Rrc\n", rc));
3373	return rc;
3374	}
3375
3376
3377	/**
3378	* VMMR0 request wrapper for GMMR0MapUnmapChunk.
3379	*
3380	* @returns see GMMR0MapUnmapChunk.
3381	* @param pVM Pointer to the shared VM structure.
3382	* @param idCpu VCPU id
3383	* @param pReq The request packet.
3384	*/
3385	GMMR0DECL(int) GMMR0MapUnmapChunkReq(PVM pVM, VMCPUID idCpu, PGMMMAPUNMAPCHUNKREQ pReq)
3386	{
3387	/*
3388	* Validate input and pass it on.
3389	*/
3390	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3391	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3392	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
3393
3394	return GMMR0MapUnmapChunk(pVM, idCpu, pReq->idChunkMap, pReq->idChunkUnmap, &pReq->pvR3);
3395	}
3396
3397
3398	/**
3399	* Legacy mode API for supplying pages.
3400	*
3401	* The specified user address points to a allocation chunk sized block that
3402	* will be locked down and used by the GMM when the GM asks for pages.
3403	*
3404	* @returns VBox status code.
3405	* @param pVM The VM.
3406	* @param idCpu VCPU id
3407	* @param pvR3 Pointer to the chunk size memory block to lock down.
3408	*/
3409	GMMR0DECL(int) GMMR0SeedChunk(PVM pVM, VMCPUID idCpu, RTR3PTR pvR3)
3410	{
3411	/*
3412	* Validate input and get the basics.
3413	*/
3414	PGMM pGMM;
3415	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3416	PGVM pGVM;
3417	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3418	if (RT_FAILURE(rc))
3419	return rc;
3420
3421	AssertPtrReturn(pvR3, VERR_INVALID_POINTER);
3422	AssertReturn(!(PAGE_OFFSET_MASK & pvR3), VERR_INVALID_POINTER);
3423
3424	if (!pGMM->fLegacyAllocationMode)
3425	{
3426	Log(("GMMR0SeedChunk: not in legacy allocation mode!\n"));
3427	return VERR_NOT_SUPPORTED;
3428	}
3429
3430	/*
3431	* Lock the memory before taking the semaphore.
3432	*/
3433	RTR0MEMOBJ MemObj;
3434	rc = RTR0MemObjLockUser(&MemObj, pvR3, GMM_CHUNK_SIZE, RTMEM_PROT_READ \| RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
3435	if (RT_SUCCESS(rc))
3436	{
3437	/* Grab the lock. */
3438	rc = RTSemFastMutexRequest(pGMM->Mtx);
3439	AssertRCReturn(rc, rc);
3440
3441	/*
3442	* Add a new chunk with our hGVM.
3443	*/
3444	rc = gmmR0RegisterChunk(pGMM, &pGMM->Private, MemObj, pGVM->hSelf, GMMCHUNKTYPE_NON_CONTINUOUS);
3445	RTSemFastMutexRelease(pGMM->Mtx);
3446
3447	if (RT_FAILURE(rc))
3448	RTR0MemObjFree(MemObj, false /* fFreeMappings */);
3449	}
3450
3451	LogFlow(("GMMR0SeedChunk: rc=%d (pvR3=%p)\n", rc, pvR3));
3452	return rc;
3453	}
3454
3455
3456	/**
3457	* Registers a new shared module for the VM
3458	*
3459	* @returns VBox status code.
3460	* @param pVM VM handle
3461	* @param idCpu VCPU id
3462	* @param enmGuestOS Guest OS type
3463	* @param pszModuleName Module name
3464	* @param pszVersion Module version
3465	* @param GCBaseAddr Module base address
3466	* @param cbModule Module size
3467	* @param cRegions Number of shared region descriptors
3468	* @param pRegions Shared region(s)
3469	*/
3470	GMMR0DECL(int) GMMR0RegisterSharedModule(PVM pVM, VMCPUID idCpu, VBOXOSFAMILY enmGuestOS, char pszModuleName, char pszVersion, RTGCPTR GCBaseAddr, uint32_t cbModule,
3471	unsigned cRegions, VMMDEVSHAREDREGIONDESC *pRegions)
3472	{
3473	#ifdef VBOX_WITH_PAGE_SHARING
3474	/*
3475	* Validate input and get the basics.
3476	*/
3477	PGMM pGMM;
3478	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3479	PGVM pGVM;
3480	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3481	if (RT_FAILURE(rc))
3482	return rc;
3483
3484	Log(("GMMR0RegisterSharedModule %s %s base %RGv size %x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule));
3485
3486	/*
3487	* Take the sempahore and do some more validations.
3488	*/
3489	rc = RTSemFastMutexRequest(pGMM->Mtx);
3490	AssertRC(rc);
3491	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3492	{
3493	bool fNewModule = false;
3494
3495	/* Check if this module is already locally registered. */
3496	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
3497	if (!pRecVM)
3498	{
3499	pRecVM = (PGMMSHAREDMODULEPERVM)RTMemAllocZ(sizeof(*pRecVM));
3500	if (!pRecVM)
3501	{
3502	AssertFailed();
3503	rc = VERR_NO_MEMORY;
3504	goto end;
3505	}
3506	pRecVM->Core.Key = GCBaseAddr;
3507
3508	bool ret = RTAvlGCPtrInsert(&pGVM->gmm.s.pSharedModuleTree, &pRecVM->Core);
3509	Assert(ret);
3510
3511	Log(("GMMR0RegisterSharedModule: new local module %s\n", pszModuleName));
3512	fNewModule = true;
3513	}
3514	else
3515	rc = VINF_PGM_SHARED_MODULE_ALREADY_REGISTERED;
3516
3517	/* Check if this module is already globally registered. */
3518	PGMMSHAREDMODULE pGlobalModule = (PGMMSHAREDMODULE)RTAvlGCPtrGet(&pGMM->pGlobalSharedModuleTree, GCBaseAddr);
3519	if (!pGlobalModule)
3520	{
3521	Assert(fNewModule);
3522	Assert(!pRecVM->fCollision);
3523
3524	pGlobalModule = (PGMMSHAREDMODULE)RTMemAllocZ(RT_OFFSETOF(GMMSHAREDMODULE, aRegions[cRegions]));
3525	if (!pGlobalModule)
3526	{
3527	AssertFailed();
3528	rc = VERR_NO_MEMORY;
3529	goto end;
3530	}
3531
3532	pGlobalModule->Core.Key = GCBaseAddr;
3533	pGlobalModule->cbModule = cbModule;
3534	/* Input limit already safe; no need to check again. */
3535	/** todo replace with RTStrCopy */
3536	strcpy(pGlobalModule->szName, pszModuleName);
3537	strcpy(pGlobalModule->szVersion, pszVersion);
3538
3539	pGlobalModule->enmGuestOS = enmGuestOS;
3540	pGlobalModule->cRegions = cRegions;
3541
3542	for (unsigned i = 0; i < cRegions; i++)
3543	{
3544	pGlobalModule->aRegions[i].GCRegionAddr = pRegions[i].GCRegionAddr;
3545	pGlobalModule->aRegions[i].cbRegion = pRegions[i].cbRegion;
3546	pGlobalModule->aRegions[i].u32Alignment = 0;
3547	pGlobalModule->aRegions[i].paHCPhysPageID = NULL; /* uninitialized. */
3548	}
3549
3550	/* Save reference. */
3551	pRecVM->pGlobalModule = pGlobalModule;
3552	pRecVM->fCollision = false;
3553	pGlobalModule->cUsers++;
3554	rc = VINF_SUCCESS;
3555
3556	bool ret = RTAvlGCPtrInsert(&pGMM->pGlobalSharedModuleTree, &pGlobalModule->Core);
3557	Assert(ret);
3558
3559	Log(("GMMR0RegisterSharedModule: new global module %s\n", pszModuleName));
3560	}
3561	else
3562	{
3563	Assert(pGlobalModule->cUsers > 0);
3564
3565	/* Make sure the name and version are identical. */
3566	/** todo replace with RTStrNCmp */
3567	if ( !strcmp(pGlobalModule->szName, pszModuleName)
3568	&& !strcmp(pGlobalModule->szVersion, pszVersion))
3569	{
3570	/* Save reference. */
3571	pRecVM->pGlobalModule = pGlobalModule;
3572	if ( fNewModule
3573	\|\| pRecVM->fCollision == true) /* colliding module unregistered and new one registerd since the last check */
3574	{
3575	pGlobalModule->cUsers++;
3576	Log(("GMMR0RegisterSharedModule: using existing module %s cUser=%d!\n", pszModuleName, pGlobalModule->cUsers));
3577	}
3578	pRecVM->fCollision = false;
3579	rc = VINF_SUCCESS;
3580	}
3581	else
3582	{
3583	Log(("GMMR0RegisterSharedModule: module %s collision!\n", pszModuleName));
3584	pRecVM->fCollision = true;
3585	rc = VINF_PGM_SHARED_MODULE_COLLISION;
3586	goto end;
3587	}
3588	}
3589
3590	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3591	}
3592	else
3593	rc = VERR_INTERNAL_ERROR_5;
3594
3595	end:
3596	RTSemFastMutexRelease(pGMM->Mtx);
3597	return rc;
3598	#else
3599	return VERR_NOT_IMPLEMENTED;
3600	#endif
3601	}
3602
3603
3604	/**
3605	* VMMR0 request wrapper for GMMR0RegisterSharedModule.
3606	*
3607	* @returns see GMMR0RegisterSharedModule.
3608	* @param pVM Pointer to the shared VM structure.
3609	* @param idCpu VCPU id
3610	* @param pReq The request packet.
3611	*/
3612	GMMR0DECL(int) GMMR0RegisterSharedModuleReq(PVM pVM, VMCPUID idCpu, PGMMREGISTERSHAREDMODULEREQ pReq)
3613	{
3614	/*
3615	* Validate input and pass it on.
3616	*/
3617	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3618	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3619	AssertMsgReturn(pReq->Hdr.cbReq >= sizeof(pReq) && pReq->Hdr.cbReq == RT_UOFFSETOF(GMMREGISTERSHAREDMODULEREQ, aRegions[pReq->cRegions]), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
3620
3621	return GMMR0RegisterSharedModule(pVM, idCpu, pReq->enmGuestOS, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule, pReq->cRegions, pReq->aRegions);
3622	}
3623
3624	/**
3625	* Unregisters a shared module for the VM
3626	*
3627	* @returns VBox status code.
3628	* @param pVM VM handle
3629	* @param idCpu VCPU id
3630	* @param pszModuleName Module name
3631	* @param pszVersion Module version
3632	* @param GCBaseAddr Module base address
3633	* @param cbModule Module size
3634	*/
3635	GMMR0DECL(int) GMMR0UnregisterSharedModule(PVM pVM, VMCPUID idCpu, char pszModuleName, char pszVersion, RTGCPTR GCBaseAddr, uint32_t cbModule)
3636	{
3637	#ifdef VBOX_WITH_PAGE_SHARING
3638	/*
3639	* Validate input and get the basics.
3640	*/
3641	PGMM pGMM;
3642	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3643	PGVM pGVM;
3644	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3645	if (RT_FAILURE(rc))
3646	return rc;
3647
3648	Log(("GMMR0UnregisterSharedModule %s %s base=%RGv size %x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule));
3649
3650	/*
3651	* Take the sempahore and do some more validations.
3652	*/
3653	rc = RTSemFastMutexRequest(pGMM->Mtx);
3654	AssertRC(rc);
3655	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3656	{
3657	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
3658	if (!pRecVM)
3659	{
3660	rc = VERR_PGM_SHARED_MODULE_NOT_FOUND;
3661	goto end;
3662	}
3663	/* Remove reference to global shared module. */
3664	if (!pRecVM->fCollision)
3665	{
3666	PGMMSHAREDMODULE pRec = pRecVM->pGlobalModule;
3667	Assert(pRec);
3668
3669	if (pRec) /* paranoia */
3670	{
3671	Assert(pRec->cUsers);
3672	pRec->cUsers--;
3673	if (pRec->cUsers == 0)
3674	{
3675	/* Free the ranges, but leave the pages intact as there might still be references; they will be cleared by the COW mechanism. */
3676	for (unsigned i = 0; i < pRec->cRegions; i++)
3677	if (pRec->aRegions[i].paHCPhysPageID)
3678	RTMemFree(pRec->aRegions[i].paHCPhysPageID);
3679
3680	/* Remove from the tree and free memory. */
3681	RTAvlGCPtrRemove(&pGMM->pGlobalSharedModuleTree, GCBaseAddr);
3682	RTMemFree(pRec);
3683	}
3684	}
3685	else
3686	rc = VERR_PGM_SHARED_MODULE_REGISTRATION_INCONSISTENCY;
3687	}
3688	else
3689	Assert(!pRecVM->pGlobalModule);
3690
3691	/* Remove from the tree and free memory. */
3692	RTAvlGCPtrRemove(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
3693	RTMemFree(pRecVM);
3694
3695	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3696	}
3697	else
3698	rc = VERR_INTERNAL_ERROR_5;
3699
3700	end:
3701	RTSemFastMutexRelease(pGMM->Mtx);
3702	return rc;
3703	#else
3704	return VERR_NOT_IMPLEMENTED;
3705	#endif
3706	}
3707
3708	/**
3709	* VMMR0 request wrapper for GMMR0UnregisterSharedModule.
3710	*
3711	* @returns see GMMR0UnregisterSharedModule.
3712	* @param pVM Pointer to the shared VM structure.
3713	* @param idCpu VCPU id
3714	* @param pReq The request packet.
3715	*/
3716	GMMR0DECL(int) GMMR0UnregisterSharedModuleReq(PVM pVM, VMCPUID idCpu, PGMMUNREGISTERSHAREDMODULEREQ pReq)
3717	{
3718	/*
3719	* Validate input and pass it on.
3720	*/
3721	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3722	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3723	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
3724
3725	return GMMR0UnregisterSharedModule(pVM, idCpu, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule);
3726	}
3727
3728
3729	#ifdef VBOX_WITH_PAGE_SHARING
3730	/**
3731	* Checks specified shared module range for changes
3732	*
3733	* Performs the following tasks:
3734	* - if a shared page is new, then it changes the GMM page type to shared and returns it in the paPageDesc array
3735	* - if a shared page already exists, then it checks if the VM page is identical and if so frees the VM page and returns the shared page in the paPageDesc array
3736	*
3737	* Note: assumes the caller has acquired the GMM semaphore!!
3738	*
3739	* @returns VBox status code.
3740	* @param pGMM Pointer to the GMM instance data.
3741	* @param pGVM Pointer to the GVM instance data.
3742	* @param pModule Module description
3743	* @param idxRegion Region index
3744	* @param cPages Number of entries in the paPageDesc array
3745	* @param paPageDesc Page descriptor array (in/out)
3746	*/
3747	GMMR0DECL(int) GMMR0SharedModuleCheckRange(PGVM pGVM, PGMMSHAREDMODULE pModule, unsigned idxRegion, unsigned cPages, PGMMSHAREDPAGEDESC paPageDesc)
3748	{
3749	int rc = VINF_SUCCESS;
3750	PGMM pGMM;
3751	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3752
3753	AssertReturn(idxRegion < pModule->cRegions, VERR_INVALID_PARAMETER);
3754	AssertReturn(cPages == (pModule->aRegions[idxRegion].cbRegion >> PAGE_SHIFT), VERR_INVALID_PARAMETER);
3755
3756	Log(("GMMR0SharedModuleCheckRange %s base %RGv region %d cPages %d\n", pModule->szName, pModule->Core.Key, idxRegion, cPages));
3757
3758	PGMMSHAREDREGIONDESC pGlobalRegion = &pModule->aRegions[idxRegion];
3759
3760	if (!pGlobalRegion->paHCPhysPageID)
3761	{
3762	/* First time; create a page descriptor array. */
3763	Log(("Allocate page descriptor array for %d pages\n", cPages));
3764	pGlobalRegion->paHCPhysPageID = (uint32_t )RTMemAlloc(cPages sizeof(*pGlobalRegion->paHCPhysPageID));
3765	if (!pGlobalRegion->paHCPhysPageID)
3766	{
3767	AssertFailed();
3768	rc = VERR_NO_MEMORY;
3769	goto end;
3770	}
3771	/* Invalidate all descriptors. */
3772	for (unsigned i = 0; i < cPages; i++)
3773	pGlobalRegion->paHCPhysPageID[i] = NIL_GMM_PAGEID;
3774	}
3775
3776	/* Check all pages in the region. */
3777	for (unsigned i = 0; i < cPages; i++)
3778	{
3779	/* Valid page present? */
3780	if (paPageDesc[i].uHCPhysPageId != NIL_GMM_PAGEID)
3781	{
3782	/* We've seen this shared page for the first time? */
3783	if (pGlobalRegion->paHCPhysPageID[i] == NIL_GMM_PAGEID)
3784	{
3785	new_shared_page:
3786	/* Easy case: just change the internal page type. */
3787	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPageDesc[i].uHCPhysPageId);
3788	if (!pPage)
3789	{
3790	AssertFailed();
3791	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
3792	goto end;
3793	}
3794	Log(("New shared page guest %RGp host %RHp\n", paPageDesc[i].GCPhys, paPageDesc[i].HCPhys));
3795
3796	AssertMsg(paPageDesc[i].GCPhys == (pPage->Private.pfn << 12), ("desc %RGp gmm %RGp\n", paPageDesc[i].HCPhys, (pPage->Private.pfn << 12)));
3797
3798	gmmR0ConvertToSharedPage(pGMM, pGVM, paPageDesc[i].HCPhys, paPageDesc[i].uHCPhysPageId, pPage);
3799
3800	/* Keep track of these references. */
3801	pGlobalRegion->paHCPhysPageID[i] = paPageDesc[i].uHCPhysPageId;
3802	}
3803	else
3804	{
3805	uint8_t pbLocalPage, pbSharedPage;
3806	uint8_t *pbChunk;
3807	PGMMCHUNK pChunk;
3808
3809	Assert(paPageDesc[i].uHCPhysPageId != pGlobalRegion->paHCPhysPageID[i]);
3810
3811	/* Get the shared page source. */
3812	PGMMPAGE pPage = gmmR0GetPage(pGMM, pGlobalRegion->paHCPhysPageID[i]);
3813	if (!pPage)
3814	{
3815	AssertFailed();
3816	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
3817	goto end;
3818	}
3819	if (pPage->Common.u2State != GMM_PAGE_STATE_SHARED)
3820	{
3821	/* Page was freed at some point; invalidate this entry. */
3822	/** todo this isn't really bullet proof. */
3823	Log(("Old shared page was freed -> create a new one\n"));
3824	pGlobalRegion->paHCPhysPageID[i] = NIL_GMM_PAGEID;
3825	goto new_shared_page; /* ugly goto */
3826	}
3827
3828	Log(("Replace existing page guest %RGp host %RHp -> %RHp\n", paPageDesc[i].GCPhys, paPageDesc[i].HCPhys, ((uint64_t)pPage->Shared.pfn) << PAGE_SHIFT));
3829
3830	/* Calculate the virtual address of the local page. */
3831	pChunk = gmmR0GetChunk(pGMM, paPageDesc[i].uHCPhysPageId >> GMM_CHUNKID_SHIFT);
3832	if (pChunk)
3833	{
3834	if (!gmmR0IsChunkMapped(pGVM, pChunk, (PRTR3PTR)&pbChunk))
3835	{
3836	AssertFailed();
3837	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
3838	goto end;
3839	}
3840	pbLocalPage = pbChunk + ((paPageDesc[i].uHCPhysPageId & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
3841	}
3842	else
3843	{
3844	AssertFailed();
3845	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
3846	goto end;
3847	}
3848
3849	/* Calculate the virtual address of the shared page. */
3850	pChunk = gmmR0GetChunk(pGMM, pGlobalRegion->paHCPhysPageID[i] >> GMM_CHUNKID_SHIFT);
3851	Assert(pChunk); /* can't fail as gmmR0GetPage succeeded. */
3852
3853	/* Get the virtual address of the physical page; map the chunk into the VM process if not already done. */
3854	if (!gmmR0IsChunkMapped(pGVM, pChunk, (PRTR3PTR)&pbChunk))
3855	{
3856	Log(("Map chunk into process!\n"));
3857	rc = gmmR0MapChunk(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk);
3858	if (rc != VINF_SUCCESS)
3859	{
3860	AssertRC(rc);
3861	goto end;
3862	}
3863	}
3864	pbSharedPage = pbChunk + ((pGlobalRegion->paHCPhysPageID[i] & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
3865
3866	/** todo write ASMMemComparePage. */
3867	if (memcmp(pbSharedPage, pbLocalPage, PAGE_SIZE))
3868	{
3869	Log(("Unexpected differences found between local and shared page; skip\n"));
3870	/* Signal to the caller that this one hasn't changed. */
3871	paPageDesc[i].uHCPhysPageId = NIL_GMM_PAGEID;
3872	continue;
3873	}
3874
3875	/* Free the old local page. */
3876	GMMFREEPAGEDESC PageDesc;
3877
3878	PageDesc.idPage = paPageDesc[i].uHCPhysPageId;
3879	rc = gmmR0FreePages(pGMM, pGVM, 1, &PageDesc, GMMACCOUNT_BASE);
3880	AssertRC(rc);
3881
3882	gmmR0UseSharedPage(pGMM, pGVM, pPage);
3883
3884	/* Pass along the new physical address & page id. */
3885	paPageDesc[i].HCPhys = ((uint64_t)pPage->Shared.pfn) << PAGE_SHIFT;
3886	paPageDesc[i].uHCPhysPageId = pGlobalRegion->paHCPhysPageID[i];
3887	}
3888	}
3889	}
3890	end:
3891	return rc;
3892	}
3893
3894	/**
3895	* RTAvlU32Destroy callback.
3896	*
3897	* @returns 0
3898	* @param pNode The node to destroy.
3899	* @param pvGVM The GVM handle.
3900	*/
3901	static DECLCALLBACK(int) gmmR0CleanupSharedModule(PAVLGCPTRNODECORE pNode, void *pvGVM)
3902	{
3903	PGVM pGVM = (PGVM)pvGVM;
3904	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)pNode;
3905	PGMM pGMM;
3906	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3907
3908	Assert(pRecVM->pGlobalModule);
3909	if (pRecVM->pGlobalModule)
3910	{
3911	PGMMSHAREDMODULE pRec = pRecVM->pGlobalModule;
3912	Assert(pRec);
3913	Assert(pRec->cUsers);
3914
3915	Log(("gmmR0CleanupSharedModule: %s %s cUsers=%d\n", pRec->szName, pRec->szVersion, pRec->cUsers));
3916	pRec->cUsers--;
3917	if (pRec->cUsers == 0)
3918	{
3919	for (unsigned i = 0; i < pRec->cRegions; i++)
3920	if (pRec->aRegions[i].paHCPhysPageID)
3921	RTMemFree(pRec->aRegions[i].paHCPhysPageID);
3922
3923	/* Remove from the tree and free memory. */
3924	RTAvlGCPtrRemove(&pGMM->pGlobalSharedModuleTree, pRec->Core.Key);
3925	RTMemFree(pRec);
3926	}
3927	}
3928	RTMemFree(pRecVM);
3929	return 0;
3930	}
3931	#endif
3932
3933	/**
3934	* Removes all shared modules for the specified VM
3935	*
3936	* @returns VBox status code.
3937	* @param pVM VM handle
3938	* @param idCpu VCPU id
3939	*/
3940	GMMR0DECL(int) GMMR0ResetSharedModules(PVM pVM, VMCPUID idCpu)
3941	{
3942	#ifdef VBOX_WITH_PAGE_SHARING
3943	/*
3944	* Validate input and get the basics.
3945	*/
3946	PGMM pGMM;
3947	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3948	PGVM pGVM;
3949	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3950	if (RT_FAILURE(rc))
3951	return rc;
3952
3953	/*
3954	* Take the sempahore and do some more validations.
3955	*/
3956	rc = RTSemFastMutexRequest(pGMM->Mtx);
3957	AssertRC(rc);
3958	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3959	{
3960	Log(("GMMR0ResetSharedModules\n"));
3961	RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, pGVM);
3962
3963	rc = VINF_SUCCESS;
3964	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3965	}
3966	else
3967	rc = VERR_INTERNAL_ERROR_5;
3968
3969	RTSemFastMutexRelease(pGMM->Mtx);
3970	return rc;
3971	#else
3972	return VERR_NOT_IMPLEMENTED;
3973	#endif
3974	}
3975
3976	#ifdef VBOX_WITH_PAGE_SHARING
3977	typedef struct
3978	{
3979	PGVM pGVM;
3980	VMCPUID idCpu;
3981	} GMMCHECKSHAREDMODULEINFO, *PGMMCHECKSHAREDMODULEINFO;
3982
3983	/**
3984	* Tree enumeration callback for checking a shared module.
3985	*/
3986	DECLCALLBACK(int) gmmR0CheckSharedModule(PAVLGCPTRNODECORE pNode, void *pvUser)
3987	{
3988	PGMMCHECKSHAREDMODULEINFO pInfo = (PGMMCHECKSHAREDMODULEINFO)pvUser;
3989	PGMMSHAREDMODULEPERVM pLocalModule = (PGMMSHAREDMODULEPERVM)pNode;
3990	PGMMSHAREDMODULE pGlobalModule = pLocalModule->pGlobalModule;
3991
3992	Log(("gmmR0CheckSharedModule: check %s %s base=%RGv size=%x collision=%d\n", pGlobalModule->szName, pGlobalModule->szVersion, pGlobalModule->Core.Key, pGlobalModule->cbModule, pLocalModule->fCollision));
3993	if (!pLocalModule->fCollision)
3994	{
3995	PGMR0SharedModuleCheckRegion(pInfo->pGVM->pVM, pInfo->idCpu, pGlobalModule, pInfo->pGVM);
3996	}
3997	return 0;
3998	}
3999	#endif
4000
4001	#ifdef DEBUG_sandervl
4002	/**
4003	* Setup for a GMMR0CheckSharedModules call (to allow log flush jumps back to ring 3)
4004	*
4005	* @returns VBox status code.
4006	* @param pVM VM handle
4007	*/
4008	GMMR0DECL(int) GMMR0CheckSharedModulesStart(PVM pVM)
4009	{
4010	/*
4011	* Validate input and get the basics.
4012	*/
4013	PGMM pGMM;
4014	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4015
4016	/*
4017	* Take the sempahore and do some more validations.
4018	*/
4019	int rc = RTSemFastMutexRequest(pGMM->Mtx);
4020	AssertRC(rc);
4021	if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4022	rc = VERR_INTERNAL_ERROR_5;
4023	else
4024	rc = VINF_SUCCESS;
4025
4026	return rc;
4027	}
4028
4029	/**
4030	* Clean up after a GMMR0CheckSharedModules call (to allow log flush jumps back to ring 3)
4031	*
4032	* @returns VBox status code.
4033	* @param pVM VM handle
4034	*/
4035	GMMR0DECL(int) GMMR0CheckSharedModulesEnd(PVM pVM)
4036	{
4037	/*
4038	* Validate input and get the basics.
4039	*/
4040	PGMM pGMM;
4041	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4042
4043	RTSemFastMutexRelease(pGMM->Mtx);
4044	return VINF_SUCCESS;
4045	}
4046	#endif
4047
4048	/**
4049	* Check all shared modules for the specified VM
4050	*
4051	* @returns VBox status code.
4052	* @param pVM VM handle
4053	* @param pVCpu VMCPU handle
4054	*/
4055	GMMR0DECL(int) GMMR0CheckSharedModules(PVM pVM, PVMCPU pVCpu)
4056	{
4057	#ifdef VBOX_WITH_PAGE_SHARING
4058	/*
4059	* Validate input and get the basics.
4060	*/
4061	PGMM pGMM;
4062	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4063	PGVM pGVM;
4064	int rc = GVMMR0ByVMAndEMT(pVM, pVCpu->idCpu, &pGVM);
4065	if (RT_FAILURE(rc))
4066	return rc;
4067
4068	# ifndef DEBUG_sandervl
4069	/*
4070	* Take the sempahore and do some more validations.
4071	*/
4072	rc = RTSemFastMutexRequest(pGMM->Mtx);
4073	AssertRC(rc);
4074	# endif
4075	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4076	{
4077	GMMCHECKSHAREDMODULEINFO Info;
4078
4079	Log(("GMMR0CheckSharedModules\n"));
4080	Info.pGVM = pGVM;
4081	Info.idCpu = pVCpu->idCpu;
4082
4083	RTAvlGCPtrDoWithAll(&pGVM->gmm.s.pSharedModuleTree, true /* fFromLeft */, gmmR0CheckSharedModule, &Info);
4084
4085	Log(("GMMR0CheckSharedModules done!\n"));
4086	rc = VINF_SUCCESS;
4087	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4088	}
4089	else
4090	rc = VERR_INTERNAL_ERROR_5;
4091
4092	# ifndef DEBUG_sandervl
4093	RTSemFastMutexRelease(pGMM->Mtx);
4094	# endif
4095	return rc;
4096	#else
4097	return VERR_NOT_IMPLEMENTED;
4098	#endif
4099	}

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMR0/GMMR0.cpp@ 29575

Download in other formats: