GMMR0.cpp@ 29509

Last change on this file since 29509 was 29509, checked in by vboxsync, 15 years ago
More logging
Property svn:eol-style set to `native` Property svn:keywords set to `Id`
File size: 140.3 KB

Line
1	/* $Id: GMMR0.cpp 29509 2010-05-17 08:40:40Z vboxsync $ */
2	/** @file
3	* GMM - Global Memory Manager.
4	*/
5
6	/*
7	* Copyright (C) 2007 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18
19	/** @page pg_gmm GMM - The Global Memory Manager
20	*
21	* As the name indicates, this component is responsible for global memory
22	* management. Currently only guest RAM is allocated from the GMM, but this
23	* may change to include shadow page tables and other bits later.
24	*
25	* Guest RAM is managed as individual pages, but allocated from the host OS
26	* in chunks for reasons of portability / efficiency. To minimize the memory
27	* footprint all tracking structure must be as small as possible without
28	* unnecessary performance penalties.
29	*
30	* The allocation chunks has fixed sized, the size defined at compile time
31	* by the #GMM_CHUNK_SIZE \#define.
32	*
33	* Each chunk is given an unquie ID. Each page also has a unique ID. The
34	* relation ship between the two IDs is:
35	* @code
36	* GMM_CHUNK_SHIFT = log2(GMM_CHUNK_SIZE / PAGE_SIZE);
37	* idPage = (idChunk << GMM_CHUNK_SHIFT) \| iPage;
38	* @endcode
39	* Where iPage is the index of the page within the chunk. This ID scheme
40	* permits for efficient chunk and page lookup, but it relies on the chunk size
41	* to be set at compile time. The chunks are organized in an AVL tree with their
42	* IDs being the keys.
43	*
44	* The physical address of each page in an allocation chunk is maintained by
45	* the #RTR0MEMOBJ and obtained using #RTR0MemObjGetPagePhysAddr. There is no
46	* need to duplicate this information (it'll cost 8-bytes per page if we did).
47	*
48	* So what do we need to track per page? Most importantly we need to know
49	* which state the page is in:
50	* - Private - Allocated for (eventually) backing one particular VM page.
51	* - Shared - Readonly page that is used by one or more VMs and treated
52	* as COW by PGM.
53	* - Free - Not used by anyone.
54	*
55	* For the page replacement operations (sharing, defragmenting and freeing)
56	* to be somewhat efficient, private pages needs to be associated with a
57	* particular page in a particular VM.
58	*
59	* Tracking the usage of shared pages is impractical and expensive, so we'll
60	* settle for a reference counting system instead.
61	*
62	* Free pages will be chained on LIFOs
63	*
64	* On 64-bit systems we will use a 64-bit bitfield per page, while on 32-bit
65	* systems a 32-bit bitfield will have to suffice because of address space
66	* limitations. The #GMMPAGE structure shows the details.
67	*
68	*
69	* @section sec_gmm_alloc_strat Page Allocation Strategy
70	*
71	* The strategy for allocating pages has to take fragmentation and shared
72	* pages into account, or we may end up with with 2000 chunks with only
73	* a few pages in each. Shared pages cannot easily be reallocated because
74	* of the inaccurate usage accounting (see above). Private pages can be
75	* reallocated by a defragmentation thread in the same manner that sharing
76	* is done.
77	*
78	* The first approach is to manage the free pages in two sets depending on
79	* whether they are mainly for the allocation of shared or private pages.
80	* In the initial implementation there will be almost no possibility for
81	* mixing shared and private pages in the same chunk (only if we're really
82	* stressed on memory), but when we implement forking of VMs and have to
83	* deal with lots of COW pages it'll start getting kind of interesting.
84	*
85	* The sets are lists of chunks with approximately the same number of
86	* free pages. Say the chunk size is 1MB, meaning 256 pages, and a set
87	* consists of 16 lists. So, the first list will contain the chunks with
88	* 1-7 free pages, the second covers 8-15, and so on. The chunks will be
89	* moved between the lists as pages are freed up or allocated.
90	*
91	*
92	* @section sec_gmm_costs Costs
93	*
94	* The per page cost in kernel space is 32-bit plus whatever RTR0MEMOBJ
95	* entails. In addition there is the chunk cost of approximately
96	* (sizeof(RT0MEMOBJ) + sizof(CHUNK)) / 2^CHUNK_SHIFT bytes per page.
97	*
98	* On Windows the per page #RTR0MEMOBJ cost is 32-bit on 32-bit windows
99	* and 64-bit on 64-bit windows (a PFN_NUMBER in the MDL). So, 64-bit per page.
100	* The cost on Linux is identical, but here it's because of sizeof(struct page *).
101	*
102	*
103	* @section sec_gmm_legacy Legacy Mode for Non-Tier-1 Platforms
104	*
105	* In legacy mode the page source is locked user pages and not
106	* #RTR0MemObjAllocPhysNC, this means that a page can only be allocated
107	* by the VM that locked it. We will make no attempt at implementing
108	* page sharing on these systems, just do enough to make it all work.
109	*
110	*
111	* @subsection sub_gmm_locking Serializing
112	*
113	* One simple fast mutex will be employed in the initial implementation, not
114	* two as metioned in @ref subsec_pgmPhys_Serializing.
115	*
116	* @see @ref subsec_pgmPhys_Serializing
117	*
118	*
119	* @section sec_gmm_overcommit Memory Over-Commitment Management
120	*
121	* The GVM will have to do the system wide memory over-commitment
122	* management. My current ideas are:
123	* - Per VM oc policy that indicates how much to initially commit
124	* to it and what to do in a out-of-memory situation.
125	* - Prevent overtaxing the host.
126	*
127	* There are some challenges here, the main ones are configurability and
128	* security. Should we for instance permit anyone to request 100% memory
129	* commitment? Who should be allowed to do runtime adjustments of the
130	* config. And how to prevent these settings from being lost when the last
131	* VM process exits? The solution is probably to have an optional root
132	* daemon the will keep VMMR0.r0 in memory and enable the security measures.
133	*
134	*
135	*
136	* @section sec_gmm_numa NUMA
137	*
138	* NUMA considerations will be designed and implemented a bit later.
139	*
140	* The preliminary guesses is that we will have to try allocate memory as
141	* close as possible to the CPUs the VM is executed on (EMT and additional CPU
142	* threads). Which means it's mostly about allocation and sharing policies.
143	* Both the scheduler and allocator interface will to supply some NUMA info
144	* and we'll need to have a way to calc access costs.
145	*
146	*/
147
148
149	/*******************************************************************************
150	* Header Files *
151	*******************************************************************************/
152	#define LOG_GROUP LOG_GROUP_GMM
153	#include <VBox/gmm.h>
154	#include "GMMR0Internal.h"
155	#include <VBox/gvm.h>
156	#include <VBox/pgm.h>
157	#include <VBox/log.h>
158	#include <VBox/param.h>
159	#include <VBox/err.h>
160	#include <iprt/asm.h>
161	#include <iprt/avl.h>
162	#include <iprt/mem.h>
163	#include <iprt/memobj.h>
164	#include <iprt/semaphore.h>
165	#include <iprt/string.h>
166
167
168	/*******************************************************************************
169	* Structures and Typedefs *
170	*******************************************************************************/
171	/** Pointer to set of free chunks. */
172	typedef struct GMMCHUNKFREESET *PGMMCHUNKFREESET;
173
174	/** Pointer to a GMM allocation chunk. */
175	typedef struct GMMCHUNK *PGMMCHUNK;
176
177	/**
178	* The per-page tracking structure employed by the GMM.
179	*
180	* On 32-bit hosts we'll some trickery is necessary to compress all
181	* the information into 32-bits. When the fSharedFree member is set,
182	* the 30th bit decides whether it's a free page or not.
183	*
184	* Because of the different layout on 32-bit and 64-bit hosts, macros
185	* are used to get and set some of the data.
186	*/
187	typedef union GMMPAGE
188	{
189	#if HC_ARCH_BITS == 64
190	/** Unsigned integer view. */
191	uint64_t u;
192
193	/** The common view. */
194	struct GMMPAGECOMMON
195	{
196	uint32_t uStuff1 : 32;
197	uint32_t uStuff2 : 30;
198	/** The page state. */
199	uint32_t u2State : 2;
200	} Common;
201
202	/** The view of a private page. */
203	struct GMMPAGEPRIVATE
204	{
205	/** The guest page frame number. (Max addressable: 2 ^ 44 - 16) */
206	uint32_t pfn;
207	/** The GVM handle. (64K VMs) */
208	uint32_t hGVM : 16;
209	/** Reserved. */
210	uint32_t u16Reserved : 14;
211	/** The page state. */
212	uint32_t u2State : 2;
213	} Private;
214
215	/** The view of a shared page. */
216	struct GMMPAGESHARED
217	{
218	/** The host page frame number. (Max addressable: 2 ^ 44 - 16) */
219	uint32_t pfn;
220	/** The reference count (64K VMs). */
221	uint32_t cRefs : 16;
222	/** Reserved. Checksum or something? Two hGVMs for forking? */
223	uint32_t u14Reserved : 14;
224	/** The page state. */
225	uint32_t u2State : 2;
226	} Shared;
227
228	/** The view of a free page. */
229	struct GMMPAGEFREE
230	{
231	/** The index of the next page in the free list. UINT16_MAX is NIL. */
232	uint16_t iNext;
233	/** Reserved. Checksum or something? */
234	uint16_t u16Reserved0;
235	/** Reserved. Checksum or something? */
236	uint32_t u30Reserved1 : 30;
237	/** The page state. */
238	uint32_t u2State : 2;
239	} Free;
240
241	#else /* 32-bit */
242	/** Unsigned integer view. */
243	uint32_t u;
244
245	/** The common view. */
246	struct GMMPAGECOMMON
247	{
248	uint32_t uStuff : 30;
249	/** The page state. */
250	uint32_t u2State : 2;
251	} Common;
252
253	/** The view of a private page. */
254	struct GMMPAGEPRIVATE
255	{
256	/** The guest page frame number. (Max addressable: 2 ^ 36) */
257	uint32_t pfn : 24;
258	/** The GVM handle. (127 VMs) */
259	uint32_t hGVM : 7;
260	/** The top page state bit, MBZ. */
261	uint32_t fZero : 1;
262	} Private;
263
264	/** The view of a shared page. */
265	struct GMMPAGESHARED
266	{
267	/** The reference count. */
268	uint32_t cRefs : 30;
269	/** The page state. */
270	uint32_t u2State : 2;
271	} Shared;
272
273	/** The view of a free page. */
274	struct GMMPAGEFREE
275	{
276	/** The index of the next page in the free list. UINT16_MAX is NIL. */
277	uint32_t iNext : 16;
278	/** Reserved. Checksum or something? */
279	uint32_t u14Reserved : 14;
280	/** The page state. */
281	uint32_t u2State : 2;
282	} Free;
283	#endif
284	} GMMPAGE;
285	AssertCompileSize(GMMPAGE, sizeof(RTHCUINTPTR));
286	/** Pointer to a GMMPAGE. */
287	typedef GMMPAGE *PGMMPAGE;
288
289
290	/** @name The Page States.
291	* @{ */
292	/** A private page. */
293	#define GMM_PAGE_STATE_PRIVATE 0
294	/** A private page - alternative value used on the 32-bit implemenation.
295	* This will never be used on 64-bit hosts. */
296	#define GMM_PAGE_STATE_PRIVATE_32 1
297	/** A shared page. */
298	#define GMM_PAGE_STATE_SHARED 2
299	/** A free page. */
300	#define GMM_PAGE_STATE_FREE 3
301	/** @} */
302
303
304	/** @def GMM_PAGE_IS_PRIVATE
305	*
306	* @returns true if private, false if not.
307	* @param pPage The GMM page.
308	*/
309	#if HC_ARCH_BITS == 64
310	# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_PRIVATE )
311	#else
312	# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Private.fZero == 0 )
313	#endif
314
315	/** @def GMM_PAGE_IS_SHARED
316	*
317	* @returns true if shared, false if not.
318	* @param pPage The GMM page.
319	*/
320	#define GMM_PAGE_IS_SHARED(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_SHARED )
321
322	/** @def GMM_PAGE_IS_FREE
323	*
324	* @returns true if free, false if not.
325	* @param pPage The GMM page.
326	*/
327	#define GMM_PAGE_IS_FREE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_FREE )
328
329	/** @def GMM_PAGE_PFN_LAST
330	* The last valid guest pfn range.
331	* @remark Some of the values outside the range has special meaning,
332	* see GMM_PAGE_PFN_UNSHAREABLE.
333	*/
334	#if HC_ARCH_BITS == 64
335	# define GMM_PAGE_PFN_LAST UINT32_C(0xfffffff0)
336	#else
337	# define GMM_PAGE_PFN_LAST UINT32_C(0x00fffff0)
338	#endif
339	AssertCompile(GMM_PAGE_PFN_LAST == (GMM_GCPHYS_LAST >> PAGE_SHIFT));
340
341	/** @def GMM_PAGE_PFN_UNSHAREABLE
342	* Indicates that this page isn't used for normal guest memory and thus isn't shareable.
343	*/
344	#if HC_ARCH_BITS == 64
345	# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0xfffffff1)
346	#else
347	# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0x00fffff1)
348	#endif
349	AssertCompile(GMM_PAGE_PFN_UNSHAREABLE == (GMM_GCPHYS_UNSHAREABLE >> PAGE_SHIFT));
350
351
352	/**
353	* A GMM allocation chunk ring-3 mapping record.
354	*
355	* This should really be associated with a session and not a VM, but
356	* it's simpler to associated with a VM and cleanup with the VM object
357	* is destroyed.
358	*/
359	typedef struct GMMCHUNKMAP
360	{
361	/** The mapping object. */
362	RTR0MEMOBJ MapObj;
363	/** The VM owning the mapping. */
364	PGVM pGVM;
365	} GMMCHUNKMAP;
366	/** Pointer to a GMM allocation chunk mapping. */
367	typedef struct GMMCHUNKMAP *PGMMCHUNKMAP;
368
369	typedef enum GMMCHUNKTYPE
370	{
371	GMMCHUNKTYPE_INVALID = 0,
372	GMMCHUNKTYPE_NON_CONTINUOUS = 1, /* 4 kb pages */
373	GMMCHUNKTYPE_CONTINUOUS = 2, /* one 2 MB continuous physical range. */
374	GMMCHUNKTYPE_32BIT_HACK = 0x7fffffff
375	} GMMCHUNKTYPE;
376
377
378	/**
379	* A GMM allocation chunk.
380	*/
381	typedef struct GMMCHUNK
382	{
383	/** The AVL node core.
384	* The Key is the chunk ID. */
385	AVLU32NODECORE Core;
386	/** The memory object.
387	* Either from RTR0MemObjAllocPhysNC or RTR0MemObjLockUser depending on
388	* what the host can dish up with. */
389	RTR0MEMOBJ MemObj;
390	/** Pointer to the next chunk in the free list. */
391	PGMMCHUNK pFreeNext;
392	/** Pointer to the previous chunk in the free list. */
393	PGMMCHUNK pFreePrev;
394	/** Pointer to the free set this chunk belongs to. NULL for
395	* chunks with no free pages. */
396	PGMMCHUNKFREESET pSet;
397	/** Pointer to an array of mappings. */
398	PGMMCHUNKMAP paMappings;
399	/** The number of mappings. */
400	uint16_t cMappings;
401	/** The head of the list of free pages. UINT16_MAX is the NIL value. */
402	uint16_t iFreeHead;
403	/** The number of free pages. */
404	uint16_t cFree;
405	/** The GVM handle of the VM that first allocated pages from this chunk, this
406	* is used as a preference when there are several chunks to choose from.
407	* When in bound memory mode this isn't a preference any longer. */
408	uint16_t hGVM;
409	/** The number of private pages. */
410	uint16_t cPrivate;
411	/** The number of shared pages. */
412	uint16_t cShared;
413	/** Chunk type */
414	GMMCHUNKTYPE enmType;
415	/** The pages. */
416	GMMPAGE aPages[GMM_CHUNK_SIZE >> PAGE_SHIFT];
417	} GMMCHUNK;
418
419
420	/**
421	* An allocation chunk TLB entry.
422	*/
423	typedef struct GMMCHUNKTLBE
424	{
425	/** The chunk id. */
426	uint32_t idChunk;
427	/** Pointer to the chunk. */
428	PGMMCHUNK pChunk;
429	} GMMCHUNKTLBE;
430	/** Pointer to an allocation chunk TLB entry. */
431	typedef GMMCHUNKTLBE *PGMMCHUNKTLBE;
432
433
434	/** The number of entries tin the allocation chunk TLB. */
435	#define GMM_CHUNKTLB_ENTRIES 32
436	/** Gets the TLB entry index for the given Chunk ID. */
437	#define GMM_CHUNKTLB_IDX(idChunk) ( (idChunk) & (GMM_CHUNKTLB_ENTRIES - 1) )
438
439	/**
440	* An allocation chunk TLB.
441	*/
442	typedef struct GMMCHUNKTLB
443	{
444	/** The TLB entries. */
445	GMMCHUNKTLBE aEntries[GMM_CHUNKTLB_ENTRIES];
446	} GMMCHUNKTLB;
447	/** Pointer to an allocation chunk TLB. */
448	typedef GMMCHUNKTLB *PGMMCHUNKTLB;
449
450
451	/** The GMMCHUNK::cFree shift count. */
452	#define GMM_CHUNK_FREE_SET_SHIFT 4
453	/** The GMMCHUNK::cFree mask for use when considering relinking a chunk. */
454	#define GMM_CHUNK_FREE_SET_MASK 15
455	/** The number of lists in set. */
456	#define GMM_CHUNK_FREE_SET_LISTS (GMM_CHUNK_NUM_PAGES >> GMM_CHUNK_FREE_SET_SHIFT)
457
458	/**
459	* A set of free chunks.
460	*/
461	typedef struct GMMCHUNKFREESET
462	{
463	/** The number of free pages in the set. */
464	uint64_t cFreePages;
465	/** Chunks ordered by increasing number of free pages. */
466	PGMMCHUNK apLists[GMM_CHUNK_FREE_SET_LISTS];
467	} GMMCHUNKFREESET;
468
469
470	/**
471	* The GMM instance data.
472	*/
473	typedef struct GMM
474	{
475	/** Magic / eye catcher. GMM_MAGIC */
476	uint32_t u32Magic;
477	/** The fast mutex protecting the GMM.
478	* More fine grained locking can be implemented later if necessary. */
479	RTSEMFASTMUTEX Mtx;
480	/** The chunk tree. */
481	PAVLU32NODECORE pChunks;
482	/** The chunk TLB. */
483	GMMCHUNKTLB ChunkTLB;
484	/** The private free set. */
485	GMMCHUNKFREESET Private;
486	/** The shared free set. */
487	GMMCHUNKFREESET Shared;
488
489	/** Shared module tree (global). */
490	/** todo seperate trees for distinctly different guest OSes. */
491	PAVLGCPTRNODECORE pGlobalSharedModuleTree;
492
493	/** The maximum number of pages we're allowed to allocate.
494	* @gcfgm 64-bit GMM/MaxPages Direct.
495	* @gcfgm 32-bit GMM/PctPages Relative to the number of host pages. */
496	uint64_t cMaxPages;
497	/** The number of pages that has been reserved.
498	* The deal is that cReservedPages - cOverCommittedPages <= cMaxPages. */
499	uint64_t cReservedPages;
500	/** The number of pages that we have over-committed in reservations. */
501	uint64_t cOverCommittedPages;
502	/** The number of actually allocated (committed if you like) pages. */
503	uint64_t cAllocatedPages;
504	/** The number of pages that are shared. A subset of cAllocatedPages. */
505	uint64_t cSharedPages;
506	/** The number of pages that are shared that has been left behind by
507	* VMs not doing proper cleanups. */
508	uint64_t cLeftBehindSharedPages;
509	/** The number of allocation chunks.
510	* (The number of pages we've allocated from the host can be derived from this.) */
511	uint32_t cChunks;
512	/** The number of current ballooned pages. */
513	uint64_t cBalloonedPages;
514
515	/** The legacy allocation mode indicator.
516	* This is determined at initialization time. */
517	bool fLegacyAllocationMode;
518	/** The bound memory mode indicator.
519	* When set, the memory will be bound to a specific VM and never
520	* shared. This is always set if fLegacyAllocationMode is set.
521	* (Also determined at initialization time.) */
522	bool fBoundMemoryMode;
523	/** The number of registered VMs. */
524	uint16_t cRegisteredVMs;
525
526	/** The previous allocated Chunk ID.
527	* Used as a hint to avoid scanning the whole bitmap. */
528	uint32_t idChunkPrev;
529	/** Chunk ID allocation bitmap.
530	* Bits of allocated IDs are set, free ones are clear.
531	* The NIL id (0) is marked allocated. */
532	uint32_t bmChunkId[(GMM_CHUNKID_LAST + 1 + 31) / 32];
533	} GMM;
534	/** Pointer to the GMM instance. */
535	typedef GMM *PGMM;
536
537	/** The value of GMM::u32Magic (Katsuhiro Otomo). */
538	#define GMM_MAGIC 0x19540414
539
540
541	/*******************************************************************************
542	* Global Variables *
543	*******************************************************************************/
544	/** Pointer to the GMM instance data. */
545	static PGMM g_pGMM = NULL;
546
547	/** Macro for obtaining and validating the g_pGMM pointer.
548	* On failure it will return from the invoking function with the specified return value.
549	*
550	* @param pGMM The name of the pGMM variable.
551	* @param rc The return value on failure. Use VERR_INTERNAL_ERROR for
552	* VBox status codes.
553	*/
554	#define GMM_GET_VALID_INSTANCE(pGMM, rc) \
555	do { \
556	(pGMM) = g_pGMM; \
557	AssertPtrReturn((pGMM), (rc)); \
558	AssertMsgReturn((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic), (rc)); \
559	} while (0)
560
561	/** Macro for obtaining and validating the g_pGMM pointer, void function variant.
562	* On failure it will return from the invoking function.
563	*
564	* @param pGMM The name of the pGMM variable.
565	*/
566	#define GMM_GET_VALID_INSTANCE_VOID(pGMM) \
567	do { \
568	(pGMM) = g_pGMM; \
569	AssertPtrReturnVoid((pGMM)); \
570	AssertMsgReturnVoid((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic)); \
571	} while (0)
572
573
574	/** @def GMM_CHECK_SANITY_UPON_ENTERING
575	* Checks the sanity of the GMM instance data before making changes.
576	*
577	* This is macro is a stub by default and must be enabled manually in the code.
578	*
579	* @returns true if sane, false if not.
580	* @param pGMM The name of the pGMM variable.
581	*/
582	#if defined(VBOX_STRICT) && 0
583	# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
584	#else
585	# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (true)
586	#endif
587
588	/** @def GMM_CHECK_SANITY_UPON_LEAVING
589	* Checks the sanity of the GMM instance data after making changes.
590	*
591	* This is macro is a stub by default and must be enabled manually in the code.
592	*
593	* @returns true if sane, false if not.
594	* @param pGMM The name of the pGMM variable.
595	*/
596	#if defined(VBOX_STRICT) && 0
597	# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
598	#else
599	# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (true)
600	#endif
601
602	/** @def GMM_CHECK_SANITY_IN_LOOPS
603	* Checks the sanity of the GMM instance in the allocation loops.
604	*
605	* This is macro is a stub by default and must be enabled manually in the code.
606	*
607	* @returns true if sane, false if not.
608	* @param pGMM The name of the pGMM variable.
609	*/
610	#if defined(VBOX_STRICT) && 0
611	# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
612	#else
613	# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (true)
614	#endif
615
616
617	/*******************************************************************************
618	* Internal Functions *
619	*******************************************************************************/
620	static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM);
621	static DECLCALLBACK(int) gmmR0CleanupVMScanChunk(PAVLU32NODECORE pNode, void *pvGMM);
622	static DECLCALLBACK(int) gmmR0CleanupSharedModule(PAVLGCPTRNODECORE pNode, void *pvGVM);
623	/static/ DECLCALLBACK(int) gmmR0CleanupVMDestroyChunk(PAVLU32NODECORE pNode, void *pvGVM);
624	DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet);
625	DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk);
626	static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo);
627	static void gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
628	static void gmmR0FreeSharedPage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage);
629	static int gmmR0UnmapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
630
631
632
633	/**
634	* Initializes the GMM component.
635	*
636	* This is called when the VMMR0.r0 module is loaded and protected by the
637	* loader semaphore.
638	*
639	* @returns VBox status code.
640	*/
641	GMMR0DECL(int) GMMR0Init(void)
642	{
643	LogFlow(("GMMInit:\n"));
644
645	/*
646	* Allocate the instance data and the lock(s).
647	*/
648	PGMM pGMM = (PGMM)RTMemAllocZ(sizeof(*pGMM));
649	if (!pGMM)
650	return VERR_NO_MEMORY;
651	pGMM->u32Magic = GMM_MAGIC;
652	for (unsigned i = 0; i < RT_ELEMENTS(pGMM->ChunkTLB.aEntries); i++)
653	pGMM->ChunkTLB.aEntries[i].idChunk = NIL_GMM_CHUNKID;
654	ASMBitSet(&pGMM->bmChunkId[0], NIL_GMM_CHUNKID);
655
656	int rc = RTSemFastMutexCreate(&pGMM->Mtx);
657	if (RT_SUCCESS(rc))
658	{
659	/*
660	* Check and see if RTR0MemObjAllocPhysNC works.
661	*/
662	#if 0 /* later, see #3170. */
663	RTR0MEMOBJ MemObj;
664	rc = RTR0MemObjAllocPhysNC(&MemObj, _64K, NIL_RTHCPHYS);
665	if (RT_SUCCESS(rc))
666	{
667	rc = RTR0MemObjFree(MemObj, true);
668	AssertRC(rc);
669	}
670	else if (rc == VERR_NOT_SUPPORTED)
671	pGMM->fLegacyAllocationMode = pGMM->fBoundMemoryMode = true;
672	else
673	SUPR0Printf("GMMR0Init: RTR0MemObjAllocPhysNC(,64K,Any) -> %d!\n", rc);
674	#else
675	# if defined(RT_OS_WINDOWS) \|\| defined(RT_OS_SOLARIS) \|\| defined(RT_OS_LINUX) \|\| defined(RT_OS_FREEBSD)
676	pGMM->fLegacyAllocationMode = false;
677	# if ARCH_BITS == 32
678	/* Don't reuse possibly partial chunks because of the virtual address space limitation. */
679	pGMM->fBoundMemoryMode = true;
680	# else
681	pGMM->fBoundMemoryMode = false;
682	# endif
683	# else
684	pGMM->fLegacyAllocationMode = true;
685	pGMM->fBoundMemoryMode = true;
686	# endif
687	#endif
688
689	/*
690	* Query system page count and guess a reasonable cMaxPages value.
691	*/
692	pGMM->cMaxPages = UINT32_MAX; /** @todo IPRT function for query ram size and such. */
693
694	g_pGMM = pGMM;
695	LogFlow(("GMMInit: pGMM=%p fLegacyAllocationMode=%RTbool fBoundMemoryMode=%RTbool\n", pGMM, pGMM->fLegacyAllocationMode, pGMM->fBoundMemoryMode));
696	return VINF_SUCCESS;
697	}
698
699	RTMemFree(pGMM);
700	SUPR0Printf("GMMR0Init: failed! rc=%d\n", rc);
701	return rc;
702	}
703
704
705	/**
706	* Terminates the GMM component.
707	*/
708	GMMR0DECL(void) GMMR0Term(void)
709	{
710	LogFlow(("GMMTerm:\n"));
711
712	/*
713	* Take care / be paranoid...
714	*/
715	PGMM pGMM = g_pGMM;
716	if (!VALID_PTR(pGMM))
717	return;
718	if (pGMM->u32Magic != GMM_MAGIC)
719	{
720	SUPR0Printf("GMMR0Term: u32Magic=%#x\n", pGMM->u32Magic);
721	return;
722	}
723
724	/*
725	* Undo what init did and free all the resources we've acquired.
726	*/
727	/* Destroy the fundamentals. */
728	g_pGMM = NULL;
729	pGMM->u32Magic++;
730	RTSemFastMutexDestroy(pGMM->Mtx);
731	pGMM->Mtx = NIL_RTSEMFASTMUTEX;
732
733	/* free any chunks still hanging around. */
734	RTAvlU32Destroy(&pGMM->pChunks, gmmR0TermDestroyChunk, pGMM);
735
736	/* finally the instance data itself. */
737	RTMemFree(pGMM);
738	LogFlow(("GMMTerm: done\n"));
739	}
740
741
742	/**
743	* RTAvlU32Destroy callback.
744	*
745	* @returns 0
746	* @param pNode The node to destroy.
747	* @param pvGMM The GMM handle.
748	*/
749	static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM)
750	{
751	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
752
753	if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
754	SUPR0Printf("GMMR0Term: %p/%#x: cFree=%d cPrivate=%d cShared=%d cMappings=%d\n", pChunk,
755	pChunk->Core.Key, pChunk->cFree, pChunk->cPrivate, pChunk->cShared, pChunk->cMappings);
756
757	int rc = RTR0MemObjFree(pChunk->MemObj, true /* fFreeMappings */);
758	if (RT_FAILURE(rc))
759	{
760	SUPR0Printf("GMMR0Term: %p/%#x: RTRMemObjFree(%p,true) -> %d (cMappings=%d)\n", pChunk,
761	pChunk->Core.Key, pChunk->MemObj, rc, pChunk->cMappings);
762	AssertRC(rc);
763	}
764	pChunk->MemObj = NIL_RTR0MEMOBJ;
765
766	RTMemFree(pChunk->paMappings);
767	pChunk->paMappings = NULL;
768
769	RTMemFree(pChunk);
770	NOREF(pvGMM);
771	return 0;
772	}
773
774
775	/**
776	* Initializes the per-VM data for the GMM.
777	*
778	* This is called from within the GVMM lock (from GVMMR0CreateVM)
779	* and should only initialize the data members so GMMR0CleanupVM
780	* can deal with them. We reserve no memory or anything here,
781	* that's done later in GMMR0InitVM.
782	*
783	* @param pGVM Pointer to the Global VM structure.
784	*/
785	GMMR0DECL(void) GMMR0InitPerVMData(PGVM pGVM)
786	{
787	AssertCompile(RT_SIZEOFMEMB(GVM,gmm.s) <= RT_SIZEOFMEMB(GVM,gmm.padding));
788
789	pGVM->gmm.s.enmPolicy = GMMOCPOLICY_INVALID;
790	pGVM->gmm.s.enmPriority = GMMPRIORITY_INVALID;
791	pGVM->gmm.s.fMayAllocate = false;
792	}
793
794
795	/**
796	* Cleans up when a VM is terminating.
797	*
798	* @param pGVM Pointer to the Global VM structure.
799	*/
800	GMMR0DECL(void) GMMR0CleanupVM(PGVM pGVM)
801	{
802	LogFlow(("GMMR0CleanupVM: pGVM=%p:{.pVM=%p, .hSelf=%#x}\n", pGVM, pGVM->pVM, pGVM->hSelf));
803
804	PGMM pGMM;
805	GMM_GET_VALID_INSTANCE_VOID(pGMM);
806
807	int rc = RTSemFastMutexRequest(pGMM->Mtx);
808	AssertRC(rc);
809	GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
810
811	#ifdef VBOX_WITH_PAGE_SHARING
812	/* Clean up all registered shared modules. */
813	RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, pGVM);
814	#endif
815
816	/*
817	* The policy is 'INVALID' until the initial reservation
818	* request has been serviced.
819	*/
820	if ( pGVM->gmm.s.enmPolicy > GMMOCPOLICY_INVALID
821	&& pGVM->gmm.s.enmPolicy < GMMOCPOLICY_END)
822	{
823	/*
824	* If it's the last VM around, we can skip walking all the chunk looking
825	* for the pages owned by this VM and instead flush the whole shebang.
826	*
827	* This takes care of the eventuality that a VM has left shared page
828	* references behind (shouldn't happen of course, but you never know).
829	*/
830	Assert(pGMM->cRegisteredVMs);
831	pGMM->cRegisteredVMs--;
832	#if 0 /* disabled so it won't hide bugs. */
833	if (!pGMM->cRegisteredVMs)
834	{
835	RTAvlU32Destroy(&pGMM->pChunks, gmmR0CleanupVMDestroyChunk, pGMM);
836
837	for (unsigned i = 0; i < RT_ELEMENTS(pGMM->ChunkTLB.aEntries); i++)
838	{
839	pGMM->ChunkTLB.aEntries[i].idChunk = NIL_GMM_CHUNKID;
840	pGMM->ChunkTLB.aEntries[i].pChunk = NULL;
841	}
842
843	memset(&pGMM->Private, 0, sizeof(pGMM->Private));
844	memset(&pGMM->Shared, 0, sizeof(pGMM->Shared));
845
846	memset(&pGMM->bmChunkId[0], 0, sizeof(pGMM->bmChunkId));
847	ASMBitSet(&pGMM->bmChunkId[0], NIL_GMM_CHUNKID);
848
849	pGMM->cReservedPages = 0;
850	pGMM->cOverCommittedPages = 0;
851	pGMM->cAllocatedPages = 0;
852	pGMM->cSharedPages = 0;
853	pGMM->cLeftBehindSharedPages = 0;
854	pGMM->cChunks = 0;
855	pGMM->cBalloonedPages = 0;
856	}
857	else
858	#endif
859	{
860	/*
861	* Walk the entire pool looking for pages that belongs to this VM
862	* and left over mappings. (This'll only catch private pages, shared
863	* pages will be 'left behind'.)
864	*/
865	uint64_t cPrivatePages = pGVM->gmm.s.cPrivatePages; /* save */
866	RTAvlU32DoWithAll(&pGMM->pChunks, true /* fFromLeft */, gmmR0CleanupVMScanChunk, pGVM);
867	if (pGVM->gmm.s.cPrivatePages)
868	SUPR0Printf("GMMR0CleanupVM: hGVM=%#x has %#x private pages that cannot be found!\n", pGVM->hSelf, pGVM->gmm.s.cPrivatePages);
869	pGMM->cAllocatedPages -= cPrivatePages;
870
871	/* free empty chunks. */
872	if (cPrivatePages)
873	{
874	PGMMCHUNK pCur = pGMM->Private.apLists[RT_ELEMENTS(pGMM->Private.apLists) - 1];
875	while (pCur)
876	{
877	PGMMCHUNK pNext = pCur->pFreeNext;
878	if ( pCur->cFree == GMM_CHUNK_NUM_PAGES
879	&& ( !pGMM->fBoundMemoryMode
880	\|\| pCur->hGVM == pGVM->hSelf))
881	gmmR0FreeChunk(pGMM, pGVM, pCur);
882	pCur = pNext;
883	}
884	}
885
886	/* account for shared pages that weren't freed. */
887	if (pGVM->gmm.s.cSharedPages)
888	{
889	Assert(pGMM->cSharedPages >= pGVM->gmm.s.cSharedPages);
890	SUPR0Printf("GMMR0CleanupVM: hGVM=%#x left %#x shared pages behind!\n", pGVM->hSelf, pGVM->gmm.s.cSharedPages);
891	pGMM->cLeftBehindSharedPages += pGVM->gmm.s.cSharedPages;
892	}
893
894	/*
895	* Update the over-commitment management statistics.
896	*/
897	pGMM->cReservedPages -= pGVM->gmm.s.Reserved.cBasePages
898	+ pGVM->gmm.s.Reserved.cFixedPages
899	+ pGVM->gmm.s.Reserved.cShadowPages;
900	switch (pGVM->gmm.s.enmPolicy)
901	{
902	case GMMOCPOLICY_NO_OC:
903	break;
904	default:
905	/** @todo Update GMM->cOverCommittedPages */
906	break;
907	}
908	}
909	}
910
911	/* zap the GVM data. */
912	pGVM->gmm.s.enmPolicy = GMMOCPOLICY_INVALID;
913	pGVM->gmm.s.enmPriority = GMMPRIORITY_INVALID;
914	pGVM->gmm.s.fMayAllocate = false;
915
916	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
917	RTSemFastMutexRelease(pGMM->Mtx);
918
919	LogFlow(("GMMR0CleanupVM: returns\n"));
920	}
921
922
923	/**
924	* RTAvlU32DoWithAll callback.
925	*
926	* @returns 0
927	* @param pNode The node to search.
928	* @param pvGVM Pointer to the shared VM structure.
929	*/
930	static DECLCALLBACK(int) gmmR0CleanupVMScanChunk(PAVLU32NODECORE pNode, void *pvGVM)
931	{
932	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
933	PGVM pGVM = (PGVM)pvGVM;
934
935	/*
936	* Look for pages belonging to the VM.
937	* (Perform some internal checks while we're scanning.)
938	*/
939	#ifndef VBOX_STRICT
940	if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
941	#endif
942	{
943	unsigned cPrivate = 0;
944	unsigned cShared = 0;
945	unsigned cFree = 0;
946
947	gmmR0UnlinkChunk(pChunk); /* avoiding cFreePages updates. */
948
949	uint16_t hGVM = pGVM->hSelf;
950	unsigned iPage = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
951	while (iPage-- > 0)
952	if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
953	{
954	if (pChunk->aPages[iPage].Private.hGVM == hGVM)
955	{
956	/*
957	* Free the page.
958	*
959	* The reason for not using gmmR0FreePrivatePage here is that we
960	* must not cause the chunk to be freed from under us - we're in
961	* an AVL tree walk here.
962	*/
963	pChunk->aPages[iPage].u = 0;
964	pChunk->aPages[iPage].Free.iNext = pChunk->iFreeHead;
965	pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
966	pChunk->iFreeHead = iPage;
967	pChunk->cPrivate--;
968	pChunk->cFree++;
969	pGVM->gmm.s.cPrivatePages--;
970	cFree++;
971	}
972	else
973	cPrivate++;
974	}
975	else if (GMM_PAGE_IS_FREE(&pChunk->aPages[iPage]))
976	cFree++;
977	else
978	cShared++;
979
980	gmmR0LinkChunk(pChunk, pChunk->cShared ? &g_pGMM->Shared : &g_pGMM->Private);
981
982	/*
983	* Did it add up?
984	*/
985	if (RT_UNLIKELY( pChunk->cFree != cFree
986	\|\| pChunk->cPrivate != cPrivate
987	\|\| pChunk->cShared != cShared))
988	{
989	SUPR0Printf("gmmR0CleanupVMScanChunk: Chunk %p/%#x has bogus stats - free=%d/%d private=%d/%d shared=%d/%d\n",
990	pChunk->cFree, cFree, pChunk->cPrivate, cPrivate, pChunk->cShared, cShared);
991	pChunk->cFree = cFree;
992	pChunk->cPrivate = cPrivate;
993	pChunk->cShared = cShared;
994	}
995	}
996
997	/*
998	* Look for the mapping belonging to the terminating VM.
999	*/
1000	for (unsigned i = 0; i < pChunk->cMappings; i++)
1001	if (pChunk->paMappings[i].pGVM == pGVM)
1002	{
1003	RTR0MEMOBJ MemObj = pChunk->paMappings[i].MapObj;
1004
1005	pChunk->cMappings--;
1006	if (i < pChunk->cMappings)
1007	pChunk->paMappings[i] = pChunk->paMappings[pChunk->cMappings];
1008	pChunk->paMappings[pChunk->cMappings].pGVM = NULL;
1009	pChunk->paMappings[pChunk->cMappings].MapObj = NIL_RTR0MEMOBJ;
1010
1011	int rc = RTR0MemObjFree(MemObj, false /* fFreeMappings (NA) */);
1012	if (RT_FAILURE(rc))
1013	{
1014	SUPR0Printf("gmmR0CleanupVMScanChunk: %p/%#x: mapping #%x: RTRMemObjFree(%p,false) -> %d \n",
1015	pChunk, pChunk->Core.Key, i, MemObj, rc);
1016	AssertRC(rc);
1017	}
1018	break;
1019	}
1020
1021	/*
1022	* If not in bound memory mode, we should reset the hGVM field
1023	* if it has our handle in it.
1024	*/
1025	if (pChunk->hGVM == pGVM->hSelf)
1026	{
1027	if (!g_pGMM->fBoundMemoryMode)
1028	pChunk->hGVM = NIL_GVM_HANDLE;
1029	else if (pChunk->cFree != GMM_CHUNK_NUM_PAGES)
1030	{
1031	SUPR0Printf("gmmR0CleanupVMScanChunk: %p/%#x: cFree=%#x - it should be 0 in bound mode!\n",
1032	pChunk, pChunk->Core.Key, pChunk->cFree);
1033	AssertMsgFailed(("%p/%#x: cFree=%#x - it should be 0 in bound mode!\n", pChunk, pChunk->Core.Key, pChunk->cFree));
1034
1035	gmmR0UnlinkChunk(pChunk);
1036	pChunk->cFree = GMM_CHUNK_NUM_PAGES;
1037	gmmR0LinkChunk(pChunk, pChunk->cShared ? &g_pGMM->Shared : &g_pGMM->Private);
1038	}
1039	}
1040
1041	return 0;
1042	}
1043
1044
1045	/**
1046	* RTAvlU32Destroy callback for GMMR0CleanupVM.
1047	*
1048	* @returns 0
1049	* @param pNode The node (allocation chunk) to destroy.
1050	* @param pvGVM Pointer to the shared VM structure.
1051	*/
1052	/static/ DECLCALLBACK(int) gmmR0CleanupVMDestroyChunk(PAVLU32NODECORE pNode, void *pvGVM)
1053	{
1054	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
1055	PGVM pGVM = (PGVM)pvGVM;
1056
1057	for (unsigned i = 0; i < pChunk->cMappings; i++)
1058	{
1059	if (pChunk->paMappings[i].pGVM != pGVM)
1060	SUPR0Printf("gmmR0CleanupVMDestroyChunk: %p/%#x: mapping #%x: pGVM=%p exepcted %p\n", pChunk,
1061	pChunk->Core.Key, i, pChunk->paMappings[i].pGVM, pGVM);
1062	int rc = RTR0MemObjFree(pChunk->paMappings[i].MapObj, false /* fFreeMappings (NA) */);
1063	if (RT_FAILURE(rc))
1064	{
1065	SUPR0Printf("gmmR0CleanupVMDestroyChunk: %p/%#x: mapping #%x: RTRMemObjFree(%p,false) -> %d \n", pChunk,
1066	pChunk->Core.Key, i, pChunk->paMappings[i].MapObj, rc);
1067	AssertRC(rc);
1068	}
1069	}
1070
1071	int rc = RTR0MemObjFree(pChunk->MemObj, true /* fFreeMappings */);
1072	if (RT_FAILURE(rc))
1073	{
1074	SUPR0Printf("gmmR0CleanupVMDestroyChunk: %p/%#x: RTRMemObjFree(%p,true) -> %d (cMappings=%d)\n", pChunk,
1075	pChunk->Core.Key, pChunk->MemObj, rc, pChunk->cMappings);
1076	AssertRC(rc);
1077	}
1078	pChunk->MemObj = NIL_RTR0MEMOBJ;
1079
1080	RTMemFree(pChunk->paMappings);
1081	pChunk->paMappings = NULL;
1082
1083	RTMemFree(pChunk);
1084	return 0;
1085	}
1086
1087
1088	/**
1089	* The initial resource reservations.
1090	*
1091	* This will make memory reservations according to policy and priority. If there aren't
1092	* sufficient resources available to sustain the VM this function will fail and all
1093	* future allocations requests will fail as well.
1094	*
1095	* These are just the initial reservations made very very early during the VM creation
1096	* process and will be adjusted later in the GMMR0UpdateReservation call after the
1097	* ring-3 init has completed.
1098	*
1099	* @returns VBox status code.
1100	* @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1101	* @retval VERR_GMM_
1102	*
1103	* @param pVM Pointer to the shared VM structure.
1104	* @param idCpu VCPU id
1105	* @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1106	* This does not include MMIO2 and similar.
1107	* @param cShadowPages The number of pages that may be allocated for shadow pageing structures.
1108	* @param cFixedPages The number of pages that may be allocated for fixed objects like the
1109	* hyper heap, MMIO2 and similar.
1110	* @param enmPolicy The OC policy to use on this VM.
1111	* @param enmPriority The priority in an out-of-memory situation.
1112	*
1113	* @thread The creator thread / EMT.
1114	*/
1115	GMMR0DECL(int) GMMR0InitialReservation(PVM pVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages,
1116	GMMOCPOLICY enmPolicy, GMMPRIORITY enmPriority)
1117	{
1118	LogFlow(("GMMR0InitialReservation: pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x enmPolicy=%d enmPriority=%d\n",
1119	pVM, cBasePages, cShadowPages, cFixedPages, enmPolicy, enmPriority));
1120
1121	/*
1122	* Validate, get basics and take the semaphore.
1123	*/
1124	PGMM pGMM;
1125	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1126	PGVM pGVM;
1127	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
1128	if (RT_FAILURE(rc))
1129	return rc;
1130
1131	AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1132	AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1133	AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1134	AssertReturn(enmPolicy > GMMOCPOLICY_INVALID && enmPolicy < GMMOCPOLICY_END, VERR_INVALID_PARAMETER);
1135	AssertReturn(enmPriority > GMMPRIORITY_INVALID && enmPriority < GMMPRIORITY_END, VERR_INVALID_PARAMETER);
1136
1137	rc = RTSemFastMutexRequest(pGMM->Mtx);
1138	AssertRC(rc);
1139	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1140	{
1141	if ( !pGVM->gmm.s.Reserved.cBasePages
1142	&& !pGVM->gmm.s.Reserved.cFixedPages
1143	&& !pGVM->gmm.s.Reserved.cShadowPages)
1144	{
1145	/*
1146	* Check if we can accomodate this.
1147	*/
1148	/* ... later ... */
1149	if (RT_SUCCESS(rc))
1150	{
1151	/*
1152	* Update the records.
1153	*/
1154	pGVM->gmm.s.Reserved.cBasePages = cBasePages;
1155	pGVM->gmm.s.Reserved.cFixedPages = cFixedPages;
1156	pGVM->gmm.s.Reserved.cShadowPages = cShadowPages;
1157	pGVM->gmm.s.enmPolicy = enmPolicy;
1158	pGVM->gmm.s.enmPriority = enmPriority;
1159	pGVM->gmm.s.fMayAllocate = true;
1160
1161	pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1162	pGMM->cRegisteredVMs++;
1163	}
1164	}
1165	else
1166	rc = VERR_WRONG_ORDER;
1167	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1168	}
1169	else
1170	rc = VERR_INTERNAL_ERROR_5;
1171	RTSemFastMutexRelease(pGMM->Mtx);
1172	LogFlow(("GMMR0InitialReservation: returns %Rrc\n", rc));
1173	return rc;
1174	}
1175
1176
1177	/**
1178	* VMMR0 request wrapper for GMMR0InitialReservation.
1179	*
1180	* @returns see GMMR0InitialReservation.
1181	* @param pVM Pointer to the shared VM structure.
1182	* @param idCpu VCPU id
1183	* @param pReq The request packet.
1184	*/
1185	GMMR0DECL(int) GMMR0InitialReservationReq(PVM pVM, VMCPUID idCpu, PGMMINITIALRESERVATIONREQ pReq)
1186	{
1187	/*
1188	* Validate input and pass it on.
1189	*/
1190	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1191	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1192	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
1193
1194	return GMMR0InitialReservation(pVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages, pReq->enmPolicy, pReq->enmPriority);
1195	}
1196
1197
1198	/**
1199	* This updates the memory reservation with the additional MMIO2 and ROM pages.
1200	*
1201	* @returns VBox status code.
1202	* @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1203	*
1204	* @param pVM Pointer to the shared VM structure.
1205	* @param idCpu VCPU id
1206	* @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1207	* This does not include MMIO2 and similar.
1208	* @param cShadowPages The number of pages that may be allocated for shadow pageing structures.
1209	* @param cFixedPages The number of pages that may be allocated for fixed objects like the
1210	* hyper heap, MMIO2 and similar.
1211	*
1212	* @thread EMT.
1213	*/
1214	GMMR0DECL(int) GMMR0UpdateReservation(PVM pVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages)
1215	{
1216	LogFlow(("GMMR0UpdateReservation: pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x\n",
1217	pVM, cBasePages, cShadowPages, cFixedPages));
1218
1219	/*
1220	* Validate, get basics and take the semaphore.
1221	*/
1222	PGMM pGMM;
1223	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1224	PGVM pGVM;
1225	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
1226	if (RT_FAILURE(rc))
1227	return rc;
1228
1229	AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1230	AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1231	AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1232
1233	rc = RTSemFastMutexRequest(pGMM->Mtx);
1234	AssertRC(rc);
1235	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1236	{
1237	if ( pGVM->gmm.s.Reserved.cBasePages
1238	&& pGVM->gmm.s.Reserved.cFixedPages
1239	&& pGVM->gmm.s.Reserved.cShadowPages)
1240	{
1241	/*
1242	* Check if we can accomodate this.
1243	*/
1244	/* ... later ... */
1245	if (RT_SUCCESS(rc))
1246	{
1247	/*
1248	* Update the records.
1249	*/
1250	pGMM->cReservedPages -= pGVM->gmm.s.Reserved.cBasePages
1251	+ pGVM->gmm.s.Reserved.cFixedPages
1252	+ pGVM->gmm.s.Reserved.cShadowPages;
1253	pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1254
1255	pGVM->gmm.s.Reserved.cBasePages = cBasePages;
1256	pGVM->gmm.s.Reserved.cFixedPages = cFixedPages;
1257	pGVM->gmm.s.Reserved.cShadowPages = cShadowPages;
1258	}
1259	}
1260	else
1261	rc = VERR_WRONG_ORDER;
1262	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1263	}
1264	else
1265	rc = VERR_INTERNAL_ERROR_5;
1266	RTSemFastMutexRelease(pGMM->Mtx);
1267	LogFlow(("GMMR0UpdateReservation: returns %Rrc\n", rc));
1268	return rc;
1269	}
1270
1271
1272	/**
1273	* VMMR0 request wrapper for GMMR0UpdateReservation.
1274	*
1275	* @returns see GMMR0UpdateReservation.
1276	* @param pVM Pointer to the shared VM structure.
1277	* @param idCpu VCPU id
1278	* @param pReq The request packet.
1279	*/
1280	GMMR0DECL(int) GMMR0UpdateReservationReq(PVM pVM, VMCPUID idCpu, PGMMUPDATERESERVATIONREQ pReq)
1281	{
1282	/*
1283	* Validate input and pass it on.
1284	*/
1285	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1286	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1287	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
1288
1289	return GMMR0UpdateReservation(pVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages);
1290	}
1291
1292
1293	/**
1294	* Performs sanity checks on a free set.
1295	*
1296	* @returns Error count.
1297	*
1298	* @param pGMM Pointer to the GMM instance.
1299	* @param pSet Pointer to the set.
1300	* @param pszSetName The set name.
1301	* @param pszFunction The function from which it was called.
1302	* @param uLine The line number.
1303	*/
1304	static uint32_t gmmR0SanityCheckSet(PGMM pGMM, PGMMCHUNKFREESET pSet, const char *pszSetName,
1305	const char *pszFunction, unsigned uLineNo)
1306	{
1307	uint32_t cErrors = 0;
1308
1309	/*
1310	* Count the free pages in all the chunks and match it against pSet->cFreePages.
1311	*/
1312	uint32_t cPages = 0;
1313	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1314	{
1315	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1316	{
1317	/** @todo check that the chunk is hash into the right set. */
1318	cPages += pCur->cFree;
1319	}
1320	}
1321	if (RT_UNLIKELY(cPages != pSet->cFreePages))
1322	{
1323	SUPR0Printf("GMM insanity: found %#x pages in the %s set, expected %#x. (%s, line %u)\n",
1324	cPages, pszSetName, pSet->cFreePages, pszFunction, uLineNo);
1325	cErrors++;
1326	}
1327
1328	return cErrors;
1329	}
1330
1331
1332	/**
1333	* Performs some sanity checks on the GMM while owning lock.
1334	*
1335	* @returns Error count.
1336	*
1337	* @param pGMM Pointer to the GMM instance.
1338	* @param pszFunction The function from which it is called.
1339	* @param uLineNo The line number.
1340	*/
1341	static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo)
1342	{
1343	uint32_t cErrors = 0;
1344
1345	cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->Private, "private", pszFunction, uLineNo);
1346	cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->Shared, "shared", pszFunction, uLineNo);
1347	/** @todo add more sanity checks. */
1348
1349	return cErrors;
1350	}
1351
1352
1353	/**
1354	* Looks up a chunk in the tree and fill in the TLB entry for it.
1355	*
1356	* This is not expected to fail and will bitch if it does.
1357	*
1358	* @returns Pointer to the allocation chunk, NULL if not found.
1359	* @param pGMM Pointer to the GMM instance.
1360	* @param idChunk The ID of the chunk to find.
1361	* @param pTlbe Pointer to the TLB entry.
1362	*/
1363	static PGMMCHUNK gmmR0GetChunkSlow(PGMM pGMM, uint32_t idChunk, PGMMCHUNKTLBE pTlbe)
1364	{
1365	PGMMCHUNK pChunk = (PGMMCHUNK)RTAvlU32Get(&pGMM->pChunks, idChunk);
1366	AssertMsgReturn(pChunk, ("Chunk %#x not found!\n", idChunk), NULL);
1367	pTlbe->idChunk = idChunk;
1368	pTlbe->pChunk = pChunk;
1369	return pChunk;
1370	}
1371
1372
1373	/**
1374	* Finds a allocation chunk.
1375	*
1376	* This is not expected to fail and will bitch if it does.
1377	*
1378	* @returns Pointer to the allocation chunk, NULL if not found.
1379	* @param pGMM Pointer to the GMM instance.
1380	* @param idChunk The ID of the chunk to find.
1381	*/
1382	DECLINLINE(PGMMCHUNK) gmmR0GetChunk(PGMM pGMM, uint32_t idChunk)
1383	{
1384	/*
1385	* Do a TLB lookup, branch if not in the TLB.
1386	*/
1387	PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(idChunk)];
1388	if ( pTlbe->idChunk != idChunk
1389	\|\| !pTlbe->pChunk)
1390	return gmmR0GetChunkSlow(pGMM, idChunk, pTlbe);
1391	return pTlbe->pChunk;
1392	}
1393
1394
1395	/**
1396	* Finds a page.
1397	*
1398	* This is not expected to fail and will bitch if it does.
1399	*
1400	* @returns Pointer to the page, NULL if not found.
1401	* @param pGMM Pointer to the GMM instance.
1402	* @param idPage The ID of the page to find.
1403	*/
1404	DECLINLINE(PGMMPAGE) gmmR0GetPage(PGMM pGMM, uint32_t idPage)
1405	{
1406	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
1407	if (RT_LIKELY(pChunk))
1408	return &pChunk->aPages[idPage & GMM_PAGEID_IDX_MASK];
1409	return NULL;
1410	}
1411
1412
1413	/**
1414	* Unlinks the chunk from the free list it's currently on (if any).
1415	*
1416	* @param pChunk The allocation chunk.
1417	*/
1418	DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk)
1419	{
1420	PGMMCHUNKFREESET pSet = pChunk->pSet;
1421	if (RT_LIKELY(pSet))
1422	{
1423	pSet->cFreePages -= pChunk->cFree;
1424
1425	PGMMCHUNK pPrev = pChunk->pFreePrev;
1426	PGMMCHUNK pNext = pChunk->pFreeNext;
1427	if (pPrev)
1428	pPrev->pFreeNext = pNext;
1429	else
1430	pSet->apLists[(pChunk->cFree - 1) >> GMM_CHUNK_FREE_SET_SHIFT] = pNext;
1431	if (pNext)
1432	pNext->pFreePrev = pPrev;
1433
1434	pChunk->pSet = NULL;
1435	pChunk->pFreeNext = NULL;
1436	pChunk->pFreePrev = NULL;
1437	}
1438	else
1439	{
1440	Assert(!pChunk->pFreeNext);
1441	Assert(!pChunk->pFreePrev);
1442	Assert(!pChunk->cFree);
1443	}
1444	}
1445
1446
1447	/**
1448	* Links the chunk onto the appropriate free list in the specified free set.
1449	*
1450	* If no free entries, it's not linked into any list.
1451	*
1452	* @param pChunk The allocation chunk.
1453	* @param pSet The free set.
1454	*/
1455	DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet)
1456	{
1457	Assert(!pChunk->pSet);
1458	Assert(!pChunk->pFreeNext);
1459	Assert(!pChunk->pFreePrev);
1460
1461	if (pChunk->cFree > 0)
1462	{
1463	pChunk->pSet = pSet;
1464	pChunk->pFreePrev = NULL;
1465	unsigned iList = (pChunk->cFree - 1) >> GMM_CHUNK_FREE_SET_SHIFT;
1466	pChunk->pFreeNext = pSet->apLists[iList];
1467	if (pChunk->pFreeNext)
1468	pChunk->pFreeNext->pFreePrev = pChunk;
1469	pSet->apLists[iList] = pChunk;
1470
1471	pSet->cFreePages += pChunk->cFree;
1472	}
1473	}
1474
1475
1476	/**
1477	* Frees a Chunk ID.
1478	*
1479	* @param pGMM Pointer to the GMM instance.
1480	* @param idChunk The Chunk ID to free.
1481	*/
1482	static void gmmR0FreeChunkId(PGMM pGMM, uint32_t idChunk)
1483	{
1484	AssertReturnVoid(idChunk != NIL_GMM_CHUNKID);
1485	AssertMsg(ASMBitTest(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk));
1486	ASMAtomicBitClear(&pGMM->bmChunkId[0], idChunk);
1487	}
1488
1489
1490	/**
1491	* Allocates a new Chunk ID.
1492	*
1493	* @returns The Chunk ID.
1494	* @param pGMM Pointer to the GMM instance.
1495	*/
1496	static uint32_t gmmR0AllocateChunkId(PGMM pGMM)
1497	{
1498	AssertCompile(!((GMM_CHUNKID_LAST + 1) & 31)); /* must be a multiple of 32 */
1499	AssertCompile(NIL_GMM_CHUNKID == 0);
1500
1501	/*
1502	* Try the next sequential one.
1503	*/
1504	int32_t idChunk = ++pGMM->idChunkPrev;
1505	#if 0 /* test the fallback first */
1506	if ( idChunk <= GMM_CHUNKID_LAST
1507	&& idChunk > NIL_GMM_CHUNKID
1508	&& !ASMAtomicBitTestAndSet(&pVMM->bmChunkId[0], idChunk))
1509	return idChunk;
1510	#endif
1511
1512	/*
1513	* Scan sequentially from the last one.
1514	*/
1515	if ( (uint32_t)idChunk < GMM_CHUNKID_LAST
1516	&& idChunk > NIL_GMM_CHUNKID)
1517	{
1518	idChunk = ASMBitNextClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1, idChunk);
1519	if (idChunk > NIL_GMM_CHUNKID)
1520	{
1521	AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
1522	return pGMM->idChunkPrev = idChunk;
1523	}
1524	}
1525
1526	/*
1527	* Ok, scan from the start.
1528	* We're not racing anyone, so there is no need to expect failures or have restart loops.
1529	*/
1530	idChunk = ASMBitFirstClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1);
1531	AssertMsgReturn(idChunk > NIL_GMM_CHUNKID, ("%#x\n", idChunk), NIL_GVM_HANDLE);
1532	AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
1533
1534	return pGMM->idChunkPrev = idChunk;
1535	}
1536
1537
1538	/**
1539	* Registers a new chunk of memory.
1540	*
1541	* This is called by both gmmR0AllocateOneChunk and GMMR0SeedChunk. The caller
1542	* must own the global lock.
1543	*
1544	* @returns VBox status code.
1545	* @param pGMM Pointer to the GMM instance.
1546	* @param pSet Pointer to the set.
1547	* @param MemObj The memory object for the chunk.
1548	* @param hGVM The affinity of the chunk. NIL_GVM_HANDLE for no
1549	* affinity.
1550	* @param enmChunkType Chunk type (continuous or non-continuous)
1551	* @param ppChunk Chunk address (out)
1552	*/
1553	static int gmmR0RegisterChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, RTR0MEMOBJ MemObj, uint16_t hGVM, GMMCHUNKTYPE enmChunkType, PGMMCHUNK *ppChunk = NULL)
1554	{
1555	Assert(hGVM != NIL_GVM_HANDLE \|\| pGMM->fBoundMemoryMode);
1556
1557	int rc;
1558	PGMMCHUNK pChunk = (PGMMCHUNK)RTMemAllocZ(sizeof(*pChunk));
1559	if (pChunk)
1560	{
1561	/*
1562	* Initialize it.
1563	*/
1564	pChunk->MemObj = MemObj;
1565	pChunk->cFree = GMM_CHUNK_NUM_PAGES;
1566	pChunk->hGVM = hGVM;
1567	pChunk->iFreeHead = 0;
1568	pChunk->enmType = enmChunkType;
1569	for (unsigned iPage = 0; iPage < RT_ELEMENTS(pChunk->aPages) - 1; iPage++)
1570	{
1571	pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
1572	pChunk->aPages[iPage].Free.iNext = iPage + 1;
1573	}
1574	pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.u2State = GMM_PAGE_STATE_FREE;
1575	pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.iNext = UINT16_MAX;
1576
1577	/*
1578	* Allocate a Chunk ID and insert it into the tree.
1579	* This has to be done behind the mutex of course.
1580	*/
1581	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1582	{
1583	pChunk->Core.Key = gmmR0AllocateChunkId(pGMM);
1584	if ( pChunk->Core.Key != NIL_GMM_CHUNKID
1585	&& pChunk->Core.Key <= GMM_CHUNKID_LAST
1586	&& RTAvlU32Insert(&pGMM->pChunks, &pChunk->Core))
1587	{
1588	pGMM->cChunks++;
1589	gmmR0LinkChunk(pChunk, pSet);
1590	LogFlow(("gmmR0RegisterChunk: pChunk=%p id=%#x cChunks=%d\n", pChunk, pChunk->Core.Key, pGMM->cChunks));
1591
1592	if (ppChunk)
1593	*ppChunk = pChunk;
1594
1595	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1596	return VINF_SUCCESS;
1597	}
1598
1599	/* bail out */
1600	rc = VERR_INTERNAL_ERROR;
1601	}
1602	else
1603	rc = VERR_INTERNAL_ERROR_5;
1604
1605	RTMemFree(pChunk);
1606	}
1607	else
1608	rc = VERR_NO_MEMORY;
1609	return rc;
1610	}
1611
1612
1613	/**
1614	* Allocate one new chunk and add it to the specified free set.
1615	*
1616	* @returns VBox status code.
1617	* @param pGMM Pointer to the GMM instance.
1618	* @param pSet Pointer to the set.
1619	* @param hGVM The affinity of the new chunk.
1620	* @param enmChunkType Chunk type (continuous or non-continuous)
1621	* @param ppChunk Chunk address (out)
1622	*
1623	* @remarks Called without owning the mutex.
1624	*/
1625	static int gmmR0AllocateOneChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, uint16_t hGVM, GMMCHUNKTYPE enmChunkType, PGMMCHUNK *ppChunk = NULL)
1626	{
1627	/*
1628	* Allocate the memory.
1629	*/
1630	RTR0MEMOBJ MemObj;
1631	int rc;
1632
1633	AssertCompile(GMM_CHUNK_SIZE == _2M);
1634	AssertReturn(enmChunkType == GMMCHUNKTYPE_NON_CONTINUOUS \|\| enmChunkType == GMMCHUNKTYPE_CONTINUOUS, VERR_INVALID_PARAMETER);
1635
1636	/* Leave the lock temporarily as the allocation might take long. */
1637	RTSemFastMutexRelease(pGMM->Mtx);
1638	if (enmChunkType == GMMCHUNKTYPE_NON_CONTINUOUS)
1639	rc = RTR0MemObjAllocPhysNC(&MemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS);
1640	else
1641	rc = RTR0MemObjAllocPhysEx(&MemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS, GMM_CHUNK_SIZE);
1642
1643	/* Grab the lock again. */
1644	int rc2 = RTSemFastMutexRequest(pGMM->Mtx);
1645	AssertRCReturn(rc2, rc2);
1646
1647	if (RT_SUCCESS(rc))
1648	{
1649	rc = gmmR0RegisterChunk(pGMM, pSet, MemObj, hGVM, enmChunkType, ppChunk);
1650	if (RT_FAILURE(rc))
1651	RTR0MemObjFree(MemObj, false /* fFreeMappings */);
1652	}
1653	/** @todo Check that RTR0MemObjAllocPhysNC always returns VERR_NO_MEMORY on
1654	* allocation failure. */
1655	return rc;
1656	}
1657
1658
1659	/**
1660	* Attempts to allocate more pages until the requested amount is met.
1661	*
1662	* @returns VBox status code.
1663	* @param pGMM Pointer to the GMM instance data.
1664	* @param pGVM The calling VM.
1665	* @param pSet Pointer to the free set to grow.
1666	* @param cPages The number of pages needed.
1667	*
1668	* @remarks Called owning the mutex, but will leave it temporarily while
1669	* allocating the memory!
1670	*/
1671	static int gmmR0AllocateMoreChunks(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet, uint32_t cPages)
1672	{
1673	Assert(!pGMM->fLegacyAllocationMode);
1674
1675	if (!GMM_CHECK_SANITY_IN_LOOPS(pGMM))
1676	return VERR_INTERNAL_ERROR_4;
1677
1678	if (!pGMM->fBoundMemoryMode)
1679	{
1680	/*
1681	* Try steal free chunks from the other set first. (Only take 100% free chunks.)
1682	*/
1683	PGMMCHUNKFREESET pOtherSet = pSet == &pGMM->Private ? &pGMM->Shared : &pGMM->Private;
1684	while ( pSet->cFreePages < cPages
1685	&& pOtherSet->cFreePages >= GMM_CHUNK_NUM_PAGES)
1686	{
1687	PGMMCHUNK pChunk = pOtherSet->apLists[GMM_CHUNK_FREE_SET_LISTS - 1];
1688	while (pChunk && pChunk->cFree != GMM_CHUNK_NUM_PAGES)
1689	pChunk = pChunk->pFreeNext;
1690	if (!pChunk)
1691	break;
1692
1693	gmmR0UnlinkChunk(pChunk);
1694	gmmR0LinkChunk(pChunk, pSet);
1695	}
1696
1697	/*
1698	* If we need still more pages, allocate new chunks.
1699	* Note! We will leave the mutex while doing the allocation,
1700	*/
1701	while (pSet->cFreePages < cPages)
1702	{
1703	int rc = gmmR0AllocateOneChunk(pGMM, pSet, pGVM->hSelf, GMMCHUNKTYPE_NON_CONTINUOUS);
1704	if (RT_FAILURE(rc))
1705	return rc;
1706	if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1707	return VERR_INTERNAL_ERROR_5;
1708	}
1709	}
1710	else
1711	{
1712	/*
1713	* The memory is bound to the VM allocating it, so we have to count
1714	* the free pages carefully as well as making sure we brand them with
1715	* our VM handle.
1716	*
1717	* Note! We will leave the mutex while doing the allocation,
1718	*/
1719	uint16_t const hGVM = pGVM->hSelf;
1720	for (;;)
1721	{
1722	/* Count and see if we've reached the goal. */
1723	uint32_t cPagesFound = 0;
1724	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1725	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1726	if (pCur->hGVM == hGVM)
1727	{
1728	cPagesFound += pCur->cFree;
1729	if (cPagesFound >= cPages)
1730	break;
1731	}
1732	if (cPagesFound >= cPages)
1733	break;
1734
1735	/* Allocate more. */
1736	int rc = gmmR0AllocateOneChunk(pGMM, pSet, hGVM, GMMCHUNKTYPE_NON_CONTINUOUS);
1737	if (RT_FAILURE(rc))
1738	return rc;
1739	if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1740	return VERR_INTERNAL_ERROR_5;
1741	}
1742	}
1743
1744	return VINF_SUCCESS;
1745	}
1746
1747
1748	/**
1749	* Allocates one private page.
1750	*
1751	* Worker for gmmR0AllocatePages.
1752	*
1753	* @param pGMM Pointer to the GMM instance data.
1754	* @param hGVM The GVM handle of the VM requesting memory.
1755	* @param pChunk The chunk to allocate it from.
1756	* @param pPageDesc The page descriptor.
1757	*/
1758	static void gmmR0AllocatePage(PGMM pGMM, uint32_t hGVM, PGMMCHUNK pChunk, PGMMPAGEDESC pPageDesc)
1759	{
1760	/* update the chunk stats. */
1761	if (pChunk->hGVM == NIL_GVM_HANDLE)
1762	pChunk->hGVM = hGVM;
1763	Assert(pChunk->cFree);
1764	pChunk->cFree--;
1765	pChunk->cPrivate++;
1766
1767	/* unlink the first free page. */
1768	const uint32_t iPage = pChunk->iFreeHead;
1769	AssertReleaseMsg(iPage < RT_ELEMENTS(pChunk->aPages), ("%d\n", iPage));
1770	PGMMPAGE pPage = &pChunk->aPages[iPage];
1771	Assert(GMM_PAGE_IS_FREE(pPage));
1772	pChunk->iFreeHead = pPage->Free.iNext;
1773	Log3(("A pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x iNext=%#x\n",
1774	pPage, iPage, (pChunk->Core.Key << GMM_CHUNKID_SHIFT) \| iPage,
1775	pPage->Common.u2State, pChunk->iFreeHead, pPage->Free.iNext));
1776
1777	/* make the page private. */
1778	pPage->u = 0;
1779	AssertCompile(GMM_PAGE_STATE_PRIVATE == 0);
1780	pPage->Private.hGVM = hGVM;
1781	AssertCompile(NIL_RTHCPHYS >= GMM_GCPHYS_LAST);
1782	AssertCompile(GMM_GCPHYS_UNSHAREABLE >= GMM_GCPHYS_LAST);
1783	if (pPageDesc->HCPhysGCPhys <= GMM_GCPHYS_LAST)
1784	pPage->Private.pfn = pPageDesc->HCPhysGCPhys >> PAGE_SHIFT;
1785	else
1786	pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE; /* unshareable / unassigned - same thing. */
1787
1788	/* update the page descriptor. */
1789	pPageDesc->HCPhysGCPhys = RTR0MemObjGetPagePhysAddr(pChunk->MemObj, iPage);
1790	Assert(pPageDesc->HCPhysGCPhys != NIL_RTHCPHYS);
1791	pPageDesc->idPage = (pChunk->Core.Key << GMM_CHUNKID_SHIFT) \| iPage;
1792	pPageDesc->idSharedPage = NIL_GMM_PAGEID;
1793	}
1794
1795
1796	/**
1797	* Common worker for GMMR0AllocateHandyPages and GMMR0AllocatePages.
1798	*
1799	* @returns VBox status code:
1800	* @retval VINF_SUCCESS on success.
1801	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk or
1802	* gmmR0AllocateMoreChunks is necessary.
1803	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
1804	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
1805	* that is we're trying to allocate more than we've reserved.
1806	*
1807	* @param pGMM Pointer to the GMM instance data.
1808	* @param pGVM Pointer to the shared VM structure.
1809	* @param cPages The number of pages to allocate.
1810	* @param paPages Pointer to the page descriptors.
1811	* See GMMPAGEDESC for details on what is expected on input.
1812	* @param enmAccount The account to charge.
1813	*/
1814	static int gmmR0AllocatePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
1815	{
1816	/*
1817	* Check allocation limits.
1818	*/
1819	if (RT_UNLIKELY(pGMM->cAllocatedPages + cPages > pGMM->cMaxPages))
1820	return VERR_GMM_HIT_GLOBAL_LIMIT;
1821
1822	switch (enmAccount)
1823	{
1824	case GMMACCOUNT_BASE:
1825	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cPages > pGVM->gmm.s.Reserved.cBasePages))
1826	{
1827	Log(("gmmR0AllocatePages:Base: Reserved=%#llx Allocated+Ballooned+Requested=%#llx+%#llx+%#x!\n",
1828	pGVM->gmm.s.Reserved.cBasePages, pGVM->gmm.s.Allocated.cBasePages, pGVM->gmm.s.cBalloonedPages, cPages));
1829	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
1830	}
1831	break;
1832	case GMMACCOUNT_SHADOW:
1833	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cShadowPages + cPages > pGVM->gmm.s.Reserved.cShadowPages))
1834	{
1835	Log(("gmmR0AllocatePages:Shadow: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
1836	pGVM->gmm.s.Reserved.cShadowPages, pGVM->gmm.s.Allocated.cShadowPages, cPages));
1837	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
1838	}
1839	break;
1840	case GMMACCOUNT_FIXED:
1841	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cFixedPages + cPages > pGVM->gmm.s.Reserved.cFixedPages))
1842	{
1843	Log(("gmmR0AllocatePages:Fixed: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
1844	pGVM->gmm.s.Reserved.cFixedPages, pGVM->gmm.s.Allocated.cFixedPages, cPages));
1845	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
1846	}
1847	break;
1848	default:
1849	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
1850	}
1851
1852	/*
1853	* Check if we need to allocate more memory or not. In bound memory mode this
1854	* is a bit extra work but it's easier to do it upfront than bailing out later.
1855	*/
1856	PGMMCHUNKFREESET pSet = &pGMM->Private;
1857	if (pSet->cFreePages < cPages)
1858	return VERR_GMM_SEED_ME;
1859	if (pGMM->fBoundMemoryMode)
1860	{
1861	uint16_t hGVM = pGVM->hSelf;
1862	uint32_t cPagesFound = 0;
1863	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1864	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1865	if (pCur->hGVM == hGVM)
1866	{
1867	cPagesFound += pCur->cFree;
1868	if (cPagesFound >= cPages)
1869	break;
1870	}
1871	if (cPagesFound < cPages)
1872	return VERR_GMM_SEED_ME;
1873	}
1874
1875	/*
1876	* Pick the pages.
1877	* Try make some effort keeping VMs sharing private chunks.
1878	*/
1879	uint16_t hGVM = pGVM->hSelf;
1880	uint32_t iPage = 0;
1881
1882	/* first round, pick from chunks with an affinity to the VM. */
1883	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists) && iPage < cPages; i++)
1884	{
1885	PGMMCHUNK pCurFree = NULL;
1886	PGMMCHUNK pCur = pSet->apLists[i];
1887	while (pCur && iPage < cPages)
1888	{
1889	PGMMCHUNK pNext = pCur->pFreeNext;
1890
1891	if ( pCur->hGVM == hGVM
1892	&& pCur->cFree < GMM_CHUNK_NUM_PAGES)
1893	{
1894	gmmR0UnlinkChunk(pCur);
1895	for (; pCur->cFree && iPage < cPages; iPage++)
1896	gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
1897	gmmR0LinkChunk(pCur, pSet);
1898	}
1899
1900	pCur = pNext;
1901	}
1902	}
1903
1904	if (iPage < cPages)
1905	{
1906	/* second round, pick pages from the 100% empty chunks we just skipped above. */
1907	PGMMCHUNK pCurFree = NULL;
1908	PGMMCHUNK pCur = pSet->apLists[RT_ELEMENTS(pSet->apLists) - 1];
1909	while (pCur && iPage < cPages)
1910	{
1911	PGMMCHUNK pNext = pCur->pFreeNext;
1912
1913	if ( pCur->cFree == GMM_CHUNK_NUM_PAGES
1914	&& ( pCur->hGVM == hGVM
1915	\|\| !pGMM->fBoundMemoryMode))
1916	{
1917	gmmR0UnlinkChunk(pCur);
1918	for (; pCur->cFree && iPage < cPages; iPage++)
1919	gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
1920	gmmR0LinkChunk(pCur, pSet);
1921	}
1922
1923	pCur = pNext;
1924	}
1925	}
1926
1927	if ( iPage < cPages
1928	&& !pGMM->fBoundMemoryMode)
1929	{
1930	/* third round, disregard affinity. */
1931	unsigned i = RT_ELEMENTS(pSet->apLists);
1932	while (i-- > 0 && iPage < cPages)
1933	{
1934	PGMMCHUNK pCurFree = NULL;
1935	PGMMCHUNK pCur = pSet->apLists[i];
1936	while (pCur && iPage < cPages)
1937	{
1938	PGMMCHUNK pNext = pCur->pFreeNext;
1939
1940	if ( pCur->cFree > GMM_CHUNK_NUM_PAGES / 2
1941	&& cPages >= GMM_CHUNK_NUM_PAGES / 2)
1942	pCur->hGVM = hGVM; /* change chunk affinity */
1943
1944	gmmR0UnlinkChunk(pCur);
1945	for (; pCur->cFree && iPage < cPages; iPage++)
1946	gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
1947	gmmR0LinkChunk(pCur, pSet);
1948
1949	pCur = pNext;
1950	}
1951	}
1952	}
1953
1954	/*
1955	* Update the account.
1956	*/
1957	switch (enmAccount)
1958	{
1959	case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages += iPage; break;
1960	case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages += iPage; break;
1961	case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages += iPage; break;
1962	default:
1963	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
1964	}
1965	pGVM->gmm.s.cPrivatePages += iPage;
1966	pGMM->cAllocatedPages += iPage;
1967
1968	AssertMsgReturn(iPage == cPages, ("%u != %u\n", iPage, cPages), VERR_INTERNAL_ERROR);
1969
1970	/*
1971	* Check if we've reached some threshold and should kick one or two VMs and tell
1972	* them to inflate their balloons a bit more... later.
1973	*/
1974
1975	return VINF_SUCCESS;
1976	}
1977
1978
1979	/**
1980	* Updates the previous allocations and allocates more pages.
1981	*
1982	* The handy pages are always taken from the 'base' memory account.
1983	* The allocated pages are not cleared and will contains random garbage.
1984	*
1985	* @returns VBox status code:
1986	* @retval VINF_SUCCESS on success.
1987	* @retval VERR_NOT_OWNER if the caller is not an EMT.
1988	* @retval VERR_GMM_PAGE_NOT_FOUND if one of the pages to update wasn't found.
1989	* @retval VERR_GMM_PAGE_NOT_PRIVATE if one of the pages to update wasn't a
1990	* private page.
1991	* @retval VERR_GMM_PAGE_NOT_SHARED if one of the pages to update wasn't a
1992	* shared page.
1993	* @retval VERR_GMM_NOT_PAGE_OWNER if one of the pages to be updated wasn't
1994	* owned by the VM.
1995	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
1996	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
1997	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
1998	* that is we're trying to allocate more than we've reserved.
1999	*
2000	* @param pVM Pointer to the shared VM structure.
2001	* @param idCpu VCPU id
2002	* @param cPagesToUpdate The number of pages to update (starting from the head).
2003	* @param cPagesToAlloc The number of pages to allocate (starting from the head).
2004	* @param paPages The array of page descriptors.
2005	* See GMMPAGEDESC for details on what is expected on input.
2006	* @thread EMT.
2007	*/
2008	GMMR0DECL(int) GMMR0AllocateHandyPages(PVM pVM, VMCPUID idCpu, uint32_t cPagesToUpdate, uint32_t cPagesToAlloc, PGMMPAGEDESC paPages)
2009	{
2010	LogFlow(("GMMR0AllocateHandyPages: pVM=%p cPagesToUpdate=%#x cPagesToAlloc=%#x paPages=%p\n",
2011	pVM, cPagesToUpdate, cPagesToAlloc, paPages));
2012
2013	/*
2014	* Validate, get basics and take the semaphore.
2015	* (This is a relatively busy path, so make predictions where possible.)
2016	*/
2017	PGMM pGMM;
2018	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2019	PGVM pGVM;
2020	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2021	if (RT_FAILURE(rc))
2022	return rc;
2023
2024	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2025	AssertMsgReturn( (cPagesToUpdate && cPagesToUpdate < 1024)
2026	\|\| (cPagesToAlloc && cPagesToAlloc < 1024),
2027	("cPagesToUpdate=%#x cPagesToAlloc=%#x\n", cPagesToUpdate, cPagesToAlloc),
2028	VERR_INVALID_PARAMETER);
2029
2030	unsigned iPage = 0;
2031	for (; iPage < cPagesToUpdate; iPage++)
2032	{
2033	AssertMsgReturn( ( paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
2034	&& !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK))
2035	\|\| paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
2036	\|\| paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE,
2037	("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys),
2038	VERR_INVALID_PARAMETER);
2039	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2040	/\|\| paPages[iPage].idPage == NIL_GMM_PAGEID/,
2041	("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2042	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2043	/\|\| paPages[iPage].idSharedPage == NIL_GMM_PAGEID/,
2044	("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2045	}
2046
2047	for (; iPage < cPagesToAlloc; iPage++)
2048	{
2049	AssertMsgReturn(paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS, ("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys), VERR_INVALID_PARAMETER);
2050	AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2051	AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2052	}
2053
2054	rc = RTSemFastMutexRequest(pGMM->Mtx);
2055	AssertRC(rc);
2056	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2057	{
2058
2059	/* No allocations before the initial reservation has been made! */
2060	if (RT_LIKELY( pGVM->gmm.s.Reserved.cBasePages
2061	&& pGVM->gmm.s.Reserved.cFixedPages
2062	&& pGVM->gmm.s.Reserved.cShadowPages))
2063	{
2064	/*
2065	* Perform the updates.
2066	* Stop on the first error.
2067	*/
2068	for (iPage = 0; iPage < cPagesToUpdate; iPage++)
2069	{
2070	if (paPages[iPage].idPage != NIL_GMM_PAGEID)
2071	{
2072	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idPage);
2073	if (RT_LIKELY(pPage))
2074	{
2075	if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
2076	{
2077	if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
2078	{
2079	AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
2080	if (RT_LIKELY(paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST))
2081	pPage->Private.pfn = paPages[iPage].HCPhysGCPhys >> PAGE_SHIFT;
2082	else if (paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE)
2083	pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE;
2084	/* else: NIL_RTHCPHYS nothing */
2085
2086	paPages[iPage].idPage = NIL_GMM_PAGEID;
2087	paPages[iPage].HCPhysGCPhys = NIL_RTHCPHYS;
2088	}
2089	else
2090	{
2091	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not owner! hGVM=%#x hSelf=%#x\n",
2092	iPage, paPages[iPage].idPage, pPage->Private.hGVM, pGVM->hSelf));
2093	rc = VERR_GMM_NOT_PAGE_OWNER;
2094	break;
2095	}
2096	}
2097	else
2098	{
2099	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not private! %.Rhxs\n", iPage, paPages[iPage].idPage, sizeof(pPage), pPage));
2100	rc = VERR_GMM_PAGE_NOT_PRIVATE;
2101	break;
2102	}
2103	}
2104	else
2105	{
2106	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (private)\n", iPage, paPages[iPage].idPage));
2107	rc = VERR_GMM_PAGE_NOT_FOUND;
2108	break;
2109	}
2110	}
2111
2112	if (paPages[iPage].idSharedPage != NIL_GMM_PAGEID)
2113	{
2114	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idSharedPage);
2115	if (RT_LIKELY(pPage))
2116	{
2117	if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
2118	{
2119	AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
2120	Assert(pPage->Shared.cRefs);
2121	Assert(pGVM->gmm.s.cSharedPages);
2122	Assert(pGVM->gmm.s.Allocated.cBasePages);
2123
2124	Log(("GMMR0AllocateHandyPages: free shared page %x cRefs=%d\n", paPages[iPage].idSharedPage, pPage->Shared.cRefs));
2125	pGVM->gmm.s.cSharedPages--;
2126	pGVM->gmm.s.Allocated.cBasePages--;
2127	if (!--pPage->Shared.cRefs)
2128	gmmR0FreeSharedPage(pGMM, paPages[iPage].idSharedPage, pPage);
2129
2130	paPages[iPage].idSharedPage = NIL_GMM_PAGEID;
2131	}
2132	else
2133	{
2134	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not shared!\n", iPage, paPages[iPage].idSharedPage));
2135	rc = VERR_GMM_PAGE_NOT_SHARED;
2136	break;
2137	}
2138	}
2139	else
2140	{
2141	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (shared)\n", iPage, paPages[iPage].idSharedPage));
2142	rc = VERR_GMM_PAGE_NOT_FOUND;
2143	break;
2144	}
2145	}
2146	}
2147
2148	/*
2149	* Join paths with GMMR0AllocatePages for the allocation.
2150	* Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
2151	*/
2152	while (RT_SUCCESS(rc))
2153	{
2154	rc = gmmR0AllocatePages(pGMM, pGVM, cPagesToAlloc, paPages, GMMACCOUNT_BASE);
2155	if ( rc != VERR_GMM_SEED_ME
2156	\|\| pGMM->fLegacyAllocationMode)
2157	break;
2158	rc = gmmR0AllocateMoreChunks(pGMM, pGVM, &pGMM->Private, cPagesToAlloc);
2159	}
2160	}
2161	else
2162	rc = VERR_WRONG_ORDER;
2163	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2164	}
2165	else
2166	rc = VERR_INTERNAL_ERROR_5;
2167	RTSemFastMutexRelease(pGMM->Mtx);
2168	LogFlow(("GMMR0AllocateHandyPages: returns %Rrc\n", rc));
2169	return rc;
2170	}
2171
2172
2173	/**
2174	* Allocate one or more pages.
2175	*
2176	* This is typically used for ROMs and MMIO2 (VRAM) during VM creation.
2177	* The allocated pages are not cleared and will contains random garbage.
2178	*
2179	* @returns VBox status code:
2180	* @retval VINF_SUCCESS on success.
2181	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2182	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2183	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2184	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2185	* that is we're trying to allocate more than we've reserved.
2186	*
2187	* @param pVM Pointer to the shared VM structure.
2188	* @param idCpu VCPU id
2189	* @param cPages The number of pages to allocate.
2190	* @param paPages Pointer to the page descriptors.
2191	* See GMMPAGEDESC for details on what is expected on input.
2192	* @param enmAccount The account to charge.
2193	*
2194	* @thread EMT.
2195	*/
2196	GMMR0DECL(int) GMMR0AllocatePages(PVM pVM, VMCPUID idCpu, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
2197	{
2198	LogFlow(("GMMR0AllocatePages: pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pVM, cPages, paPages, enmAccount));
2199
2200	/*
2201	* Validate, get basics and take the semaphore.
2202	*/
2203	PGMM pGMM;
2204	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2205	PGVM pGVM;
2206	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2207	if (RT_FAILURE(rc))
2208	return rc;
2209
2210	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2211	AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
2212	AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
2213
2214	for (unsigned iPage = 0; iPage < cPages; iPage++)
2215	{
2216	AssertMsgReturn( paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
2217	\|\| paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE
2218	\|\| ( enmAccount == GMMACCOUNT_BASE
2219	&& paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
2220	&& !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK)),
2221	("#%#x: %RHp enmAccount=%d\n", iPage, paPages[iPage].HCPhysGCPhys, enmAccount),
2222	VERR_INVALID_PARAMETER);
2223	AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2224	AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2225	}
2226
2227	rc = RTSemFastMutexRequest(pGMM->Mtx);
2228	AssertRC(rc);
2229	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2230	{
2231
2232	/* No allocations before the initial reservation has been made! */
2233	if (RT_LIKELY( pGVM->gmm.s.Reserved.cBasePages
2234	&& pGVM->gmm.s.Reserved.cFixedPages
2235	&& pGVM->gmm.s.Reserved.cShadowPages))
2236	{
2237	/*
2238	* gmmR0AllocatePages seed loop.
2239	* Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
2240	*/
2241	while (RT_SUCCESS(rc))
2242	{
2243	rc = gmmR0AllocatePages(pGMM, pGVM, cPages, paPages, enmAccount);
2244	if ( rc != VERR_GMM_SEED_ME
2245	\|\| pGMM->fLegacyAllocationMode)
2246	break;
2247	rc = gmmR0AllocateMoreChunks(pGMM, pGVM, &pGMM->Private, cPages);
2248	}
2249	}
2250	else
2251	rc = VERR_WRONG_ORDER;
2252	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2253	}
2254	else
2255	rc = VERR_INTERNAL_ERROR_5;
2256	RTSemFastMutexRelease(pGMM->Mtx);
2257	LogFlow(("GMMR0AllocatePages: returns %Rrc\n", rc));
2258	return rc;
2259	}
2260
2261
2262	/**
2263	* VMMR0 request wrapper for GMMR0AllocatePages.
2264	*
2265	* @returns see GMMR0AllocatePages.
2266	* @param pVM Pointer to the shared VM structure.
2267	* @param idCpu VCPU id
2268	* @param pReq The request packet.
2269	*/
2270	GMMR0DECL(int) GMMR0AllocatePagesReq(PVM pVM, VMCPUID idCpu, PGMMALLOCATEPAGESREQ pReq)
2271	{
2272	/*
2273	* Validate input and pass it on.
2274	*/
2275	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2276	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2277	AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0]),
2278	("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0])),
2279	VERR_INVALID_PARAMETER);
2280	AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[pReq->cPages]),
2281	("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[pReq->cPages])),
2282	VERR_INVALID_PARAMETER);
2283
2284	return GMMR0AllocatePages(pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
2285	}
2286
2287	/**
2288	* Allocate a large page to represent guest RAM
2289	*
2290	* The allocated pages are not cleared and will contains random garbage.
2291	*
2292	* @returns VBox status code:
2293	* @retval VINF_SUCCESS on success.
2294	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2295	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2296	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2297	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2298	* that is we're trying to allocate more than we've reserved.
2299	* @returns see GMMR0AllocatePages.
2300	* @param pVM Pointer to the shared VM structure.
2301	* @param idCpu VCPU id
2302	* @param cbPage Large page size
2303	*/
2304	GMMR0DECL(int) GMMR0AllocateLargePage(PVM pVM, VMCPUID idCpu, uint32_t cbPage, uint32_t pIdPage, RTHCPHYS pHCPhys)
2305	{
2306	LogFlow(("GMMR0AllocateLargePage: pVM=%p cbPage=%x\n", pVM, cbPage));
2307
2308	AssertReturn(cbPage == GMM_CHUNK_SIZE, VERR_INVALID_PARAMETER);
2309	AssertPtrReturn(pIdPage, VERR_INVALID_PARAMETER);
2310	AssertPtrReturn(pHCPhys, VERR_INVALID_PARAMETER);
2311
2312	/*
2313	* Validate, get basics and take the semaphore.
2314	*/
2315	PGMM pGMM;
2316	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2317	PGVM pGVM;
2318	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2319	if (RT_FAILURE(rc))
2320	return rc;
2321
2322	/* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
2323	if (pGMM->fLegacyAllocationMode)
2324	return VERR_NOT_SUPPORTED;
2325
2326	*pHCPhys = NIL_RTHCPHYS;
2327	*pIdPage = NIL_GMM_PAGEID;
2328
2329	rc = RTSemFastMutexRequest(pGMM->Mtx);
2330	AssertRCReturn(rc, rc);
2331	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2332	{
2333	const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
2334	PGMMCHUNK pChunk;
2335	GMMPAGEDESC PageDesc;
2336
2337	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cPages > pGVM->gmm.s.Reserved.cBasePages))
2338	{
2339	Log(("GMMR0AllocateLargePage: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
2340	pGVM->gmm.s.Reserved.cBasePages, pGVM->gmm.s.Allocated.cBasePages, cPages));
2341	RTSemFastMutexRelease(pGMM->Mtx);
2342	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2343	}
2344
2345	/* Allocate a new continous chunk. */
2346	rc = gmmR0AllocateOneChunk(pGMM, &pGMM->Private, pGVM->hSelf, GMMCHUNKTYPE_CONTINUOUS, &pChunk);
2347	if (RT_FAILURE(rc))
2348	{
2349	RTSemFastMutexRelease(pGMM->Mtx);
2350	return rc;
2351	}
2352
2353	/* Unlink the new chunk from the free list. */
2354	gmmR0UnlinkChunk(pChunk);
2355
2356	/* Allocate all pages. */
2357	gmmR0AllocatePage(pGMM, pGVM->hSelf, pChunk, &PageDesc);
2358	/* Return the first page as we'll use the whole chunk as one big page. */
2359	*pIdPage = PageDesc.idPage;
2360	*pHCPhys = PageDesc.HCPhysGCPhys;
2361
2362	for (unsigned i = 1; i < cPages; i++)
2363	gmmR0AllocatePage(pGMM, pGVM->hSelf, pChunk, &PageDesc);
2364
2365	/* Update accounting. */
2366	pGVM->gmm.s.Allocated.cBasePages += cPages;
2367	pGVM->gmm.s.cPrivatePages += cPages;
2368	pGMM->cAllocatedPages += cPages;
2369
2370	gmmR0LinkChunk(pChunk, &pGMM->Private);
2371	}
2372	else
2373	rc = VERR_INTERNAL_ERROR_5;
2374
2375	RTSemFastMutexRelease(pGMM->Mtx);
2376	LogFlow(("GMMR0AllocatePages: returns %Rrc\n", rc));
2377	return rc;
2378	}
2379
2380
2381	/**
2382	* Free a large page
2383	*
2384	* @returns VBox status code:
2385	* @param pVM Pointer to the shared VM structure.
2386	* @param idCpu VCPU id
2387	* @param idPage Large page id
2388	*/
2389	GMMR0DECL(int) GMMR0FreeLargePage(PVM pVM, VMCPUID idCpu, uint32_t idPage)
2390	{
2391	LogFlow(("GMMR0FreeLargePage: pVM=%p idPage=%x\n", pVM, idPage));
2392
2393	/*
2394	* Validate, get basics and take the semaphore.
2395	*/
2396	PGMM pGMM;
2397	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2398	PGVM pGVM;
2399	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2400	if (RT_FAILURE(rc))
2401	return rc;
2402
2403	/* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
2404	if (pGMM->fLegacyAllocationMode)
2405	return VERR_NOT_SUPPORTED;
2406
2407	rc = RTSemFastMutexRequest(pGMM->Mtx);
2408	AssertRC(rc);
2409	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2410	{
2411	const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
2412
2413	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages < cPages))
2414	{
2415	Log(("GMMR0FreeLargePage: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cBasePages, cPages));
2416	RTSemFastMutexRelease(pGMM->Mtx);
2417	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2418	}
2419
2420	PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
2421	if ( RT_LIKELY(pPage)
2422	&& RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
2423	{
2424	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2425	Assert(pChunk);
2426	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2427	Assert(pChunk->cPrivate > 0);
2428
2429	/* Release the memory immediately. */
2430	gmmR0FreeChunk(pGMM, NULL, pChunk);
2431
2432	/* Update accounting. */
2433	pGVM->gmm.s.Allocated.cBasePages -= cPages;
2434	pGVM->gmm.s.cPrivatePages -= cPages;
2435	pGMM->cAllocatedPages -= cPages;
2436	}
2437	else
2438	rc = VERR_GMM_PAGE_NOT_FOUND;
2439	}
2440	else
2441	rc = VERR_INTERNAL_ERROR_5;
2442
2443	RTSemFastMutexRelease(pGMM->Mtx);
2444	LogFlow(("GMMR0FreeLargePage: returns %Rrc\n", rc));
2445	return rc;
2446	}
2447
2448
2449	/**
2450	* VMMR0 request wrapper for GMMR0FreeLargePage.
2451	*
2452	* @returns see GMMR0FreeLargePage.
2453	* @param pVM Pointer to the shared VM structure.
2454	* @param idCpu VCPU id
2455	* @param pReq The request packet.
2456	*/
2457	GMMR0DECL(int) GMMR0FreeLargePageReq(PVM pVM, VMCPUID idCpu, PGMMFREELARGEPAGEREQ pReq)
2458	{
2459	/*
2460	* Validate input and pass it on.
2461	*/
2462	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2463	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2464	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMFREEPAGESREQ),
2465	("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(GMMFREEPAGESREQ)),
2466	VERR_INVALID_PARAMETER);
2467
2468	return GMMR0FreeLargePage(pVM, idCpu, pReq->idPage);
2469	}
2470
2471	/**
2472	* Frees a chunk, giving it back to the host OS.
2473	*
2474	* @param pGMM Pointer to the GMM instance.
2475	* @param pGVM This is set when called from GMMR0CleanupVM so we can
2476	* unmap and free the chunk in one go.
2477	* @param pChunk The chunk to free.
2478	*/
2479	static void gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
2480	{
2481	Assert(pChunk->Core.Key != NIL_GMM_CHUNKID);
2482
2483	/*
2484	* Cleanup hack! Unmap the chunk from the callers address space.
2485	*/
2486	if ( pChunk->cMappings
2487	&& pGVM)
2488	gmmR0UnmapChunk(pGMM, pGVM, pChunk);
2489
2490	/*
2491	* If there are current mappings of the chunk, then request the
2492	* VMs to unmap them. Reposition the chunk in the free list so
2493	* it won't be a likely candidate for allocations.
2494	*/
2495	if (pChunk->cMappings)
2496	{
2497	/** @todo R0 -> VM request */
2498	/* The chunk can be owned by more than one VM if fBoundMemoryMode is false! */
2499	}
2500	else
2501	{
2502	/*
2503	* Try free the memory object.
2504	*/
2505	int rc = RTR0MemObjFree(pChunk->MemObj, false /* fFreeMappings */);
2506	if (RT_SUCCESS(rc))
2507	{
2508	pChunk->MemObj = NIL_RTR0MEMOBJ;
2509
2510	/*
2511	* Unlink it from everywhere.
2512	*/
2513	gmmR0UnlinkChunk(pChunk);
2514
2515	PAVLU32NODECORE pCore = RTAvlU32Remove(&pGMM->pChunks, pChunk->Core.Key);
2516	Assert(pCore == &pChunk->Core); NOREF(pCore);
2517
2518	PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(pChunk->Core.Key)];
2519	if (pTlbe->pChunk == pChunk)
2520	{
2521	pTlbe->idChunk = NIL_GMM_CHUNKID;
2522	pTlbe->pChunk = NULL;
2523	}
2524
2525	Assert(pGMM->cChunks > 0);
2526	pGMM->cChunks--;
2527
2528	/*
2529	* Free the Chunk ID and struct.
2530	*/
2531	gmmR0FreeChunkId(pGMM, pChunk->Core.Key);
2532	pChunk->Core.Key = NIL_GMM_CHUNKID;
2533
2534	RTMemFree(pChunk->paMappings);
2535	pChunk->paMappings = NULL;
2536
2537	RTMemFree(pChunk);
2538	}
2539	else
2540	AssertRC(rc);
2541	}
2542	}
2543
2544
2545	/**
2546	* Free page worker.
2547	*
2548	* The caller does all the statistic decrementing, we do all the incrementing.
2549	*
2550	* @param pGMM Pointer to the GMM instance data.
2551	* @param pChunk Pointer to the chunk this page belongs to.
2552	* @param idPage The Page ID.
2553	* @param pPage Pointer to the page.
2554	*/
2555	static void gmmR0FreePageWorker(PGMM pGMM, PGMMCHUNK pChunk, uint32_t idPage, PGMMPAGE pPage)
2556	{
2557	Log3(("F pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x\n",
2558	pPage, pPage - &pChunk->aPages[0], idPage, pPage->Common.u2State, pChunk->iFreeHead)); NOREF(idPage);
2559
2560	/*
2561	* Put the page on the free list.
2562	*/
2563	pPage->u = 0;
2564	pPage->Free.u2State = GMM_PAGE_STATE_FREE;
2565	Assert(pChunk->iFreeHead < RT_ELEMENTS(pChunk->aPages) \|\| pChunk->iFreeHead == UINT16_MAX);
2566	pPage->Free.iNext = pChunk->iFreeHead;
2567	pChunk->iFreeHead = pPage - &pChunk->aPages[0];
2568
2569	/*
2570	* Update statistics (the cShared/cPrivate stats are up to date already),
2571	* and relink the chunk if necessary.
2572	*/
2573	if ((pChunk->cFree & GMM_CHUNK_FREE_SET_MASK) == 0)
2574	{
2575	gmmR0UnlinkChunk(pChunk);
2576	pChunk->cFree++;
2577	gmmR0LinkChunk(pChunk, pChunk->cShared ? &pGMM->Shared : &pGMM->Private);
2578	}
2579	else
2580	{
2581	pChunk->cFree++;
2582	pChunk->pSet->cFreePages++;
2583
2584	/*
2585	* If the chunk becomes empty, consider giving memory back to the host OS.
2586	*
2587	* The current strategy is to try give it back if there are other chunks
2588	* in this free list, meaning if there are at least 240 free pages in this
2589	* category. Note that since there are probably mappings of the chunk,
2590	* it won't be freed up instantly, which probably screws up this logic
2591	* a bit...
2592	*/
2593	if (RT_UNLIKELY( pChunk->cFree == GMM_CHUNK_NUM_PAGES
2594	&& pChunk->pFreeNext
2595	&& pChunk->pFreePrev
2596	&& !pGMM->fLegacyAllocationMode))
2597	gmmR0FreeChunk(pGMM, NULL, pChunk);
2598	}
2599	}
2600
2601
2602	/**
2603	* Frees a shared page, the page is known to exist and be valid and such.
2604	*
2605	* @param pGMM Pointer to the GMM instance.
2606	* @param idPage The Page ID
2607	* @param pPage The page structure.
2608	*/
2609	DECLINLINE(void) gmmR0FreeSharedPage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage)
2610	{
2611	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2612	Assert(pChunk);
2613	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2614	Assert(pChunk->cShared > 0);
2615	Assert(pGMM->cSharedPages > 0);
2616	Assert(pGMM->cAllocatedPages > 0);
2617	Assert(!pPage->Shared.cRefs);
2618
2619	pChunk->cShared--;
2620	pGMM->cAllocatedPages--;
2621	pGMM->cSharedPages--;
2622	gmmR0FreePageWorker(pGMM, pChunk, idPage, pPage);
2623	}
2624
2625	#ifdef VBOX_WITH_PAGE_SHARING
2626	/**
2627	* Converts a private page to a shared page, the page is known to exist and be valid and such.
2628	*
2629	* @param pGMM Pointer to the GMM instance.
2630	* @param pGVM Pointer to the GVM instance.
2631	* @param HCPhys Host physical address
2632	* @param idPage The Page ID
2633	* @param pPage The page structure.
2634	*/
2635	DECLINLINE(void) gmmR0ConvertToSharedPage(PGMM pGMM, PGVM pGVM, RTHCPHYS HCPhys, uint32_t idPage, PGMMPAGE pPage)
2636	{
2637	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2638	Assert(pChunk);
2639	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2640	Assert(GMM_PAGE_IS_PRIVATE(pPage));
2641
2642	pChunk->cPrivate--;
2643	pChunk->cShared++;
2644
2645	pGMM->cSharedPages++;
2646
2647	pGVM->gmm.s.cSharedPages++;
2648	pGVM->gmm.s.cPrivatePages--;
2649
2650	/* Modify the page structure. */
2651	pPage->Shared.pfn = (uint32_t)(HCPhys >> PAGE_SHIFT);
2652	pPage->Shared.cRefs = 1;
2653	pPage->Common.u2State = GMM_PAGE_STATE_SHARED;
2654	}
2655
2656	/**
2657	* Increase the use count of a shared page, the page is known to exist and be valid and such.
2658	*
2659	* @param pGMM Pointer to the GMM instance.
2660	* @param pGVM Pointer to the GVM instance.
2661	* @param pPage The page structure.
2662	*/
2663	DECLINLINE(void) gmmR0UseSharedPage(PGMM pGMM, PGVM pGVM, PGMMPAGE pPage)
2664	{
2665	Assert(pGMM->cSharedPages > 0);
2666	Assert(pGMM->cAllocatedPages > 0);
2667
2668	pPage->Shared.cRefs++;
2669	pGVM->gmm.s.cSharedPages++;
2670	}
2671	#endif
2672
2673	/**
2674	* Frees a private page, the page is known to exist and be valid and such.
2675	*
2676	* @param pGMM Pointer to the GMM instance.
2677	* @param idPage The Page ID
2678	* @param pPage The page structure.
2679	*/
2680	DECLINLINE(void) gmmR0FreePrivatePage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage)
2681	{
2682	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2683	Assert(pChunk);
2684	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2685	Assert(pChunk->cPrivate > 0);
2686	Assert(pGMM->cAllocatedPages > 0);
2687
2688	pChunk->cPrivate--;
2689	pGMM->cAllocatedPages--;
2690	gmmR0FreePageWorker(pGMM, pChunk, idPage, pPage);
2691	}
2692
2693	/**
2694	* Common worker for GMMR0FreePages and GMMR0BalloonedPages.
2695	*
2696	* @returns VBox status code:
2697	* @retval xxx
2698	*
2699	* @param pGMM Pointer to the GMM instance data.
2700	* @param pGVM Pointer to the shared VM structure.
2701	* @param cPages The number of pages to free.
2702	* @param paPages Pointer to the page descriptors.
2703	* @param enmAccount The account this relates to.
2704	*/
2705	static int gmmR0FreePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
2706	{
2707	/*
2708	* Check that the request isn't impossible wrt to the account status.
2709	*/
2710	switch (enmAccount)
2711	{
2712	case GMMACCOUNT_BASE:
2713	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages < cPages))
2714	{
2715	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cBasePages, cPages));
2716	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2717	}
2718	break;
2719	case GMMACCOUNT_SHADOW:
2720	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cShadowPages < cPages))
2721	{
2722	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cShadowPages, cPages));
2723	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2724	}
2725	break;
2726	case GMMACCOUNT_FIXED:
2727	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cFixedPages < cPages))
2728	{
2729	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cFixedPages, cPages));
2730	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2731	}
2732	break;
2733	default:
2734	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2735	}
2736
2737	/*
2738	* Walk the descriptors and free the pages.
2739	*
2740	* Statistics (except the account) are being updated as we go along,
2741	* unlike the alloc code. Also, stop on the first error.
2742	*/
2743	int rc = VINF_SUCCESS;
2744	uint32_t iPage;
2745	for (iPage = 0; iPage < cPages; iPage++)
2746	{
2747	uint32_t idPage = paPages[iPage].idPage;
2748	PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
2749	if (RT_LIKELY(pPage))
2750	{
2751	if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
2752	{
2753	if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
2754	{
2755	Assert(pGVM->gmm.s.cPrivatePages);
2756	pGVM->gmm.s.cPrivatePages--;
2757	gmmR0FreePrivatePage(pGMM, idPage, pPage);
2758	}
2759	else
2760	{
2761	Log(("gmmR0AllocatePages: #%#x/%#x: not owner! hGVM=%#x hSelf=%#x\n", iPage, idPage,
2762	pPage->Private.hGVM, pGVM->hSelf));
2763	rc = VERR_GMM_NOT_PAGE_OWNER;
2764	break;
2765	}
2766	}
2767	else if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
2768	{
2769	Assert(pGVM->gmm.s.cSharedPages);
2770	pGVM->gmm.s.cSharedPages--;
2771	Assert(pPage->Shared.cRefs);
2772	if (!--pPage->Shared.cRefs)
2773	gmmR0FreeSharedPage(pGMM, idPage, pPage);
2774	}
2775	else
2776	{
2777	Log(("gmmR0AllocatePages: #%#x/%#x: already free!\n", iPage, idPage));
2778	rc = VERR_GMM_PAGE_ALREADY_FREE;
2779	break;
2780	}
2781	}
2782	else
2783	{
2784	Log(("gmmR0AllocatePages: #%#x/%#x: not found!\n", iPage, idPage));
2785	rc = VERR_GMM_PAGE_NOT_FOUND;
2786	break;
2787	}
2788	paPages[iPage].idPage = NIL_GMM_PAGEID;
2789	}
2790
2791	/*
2792	* Update the account.
2793	*/
2794	switch (enmAccount)
2795	{
2796	case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages -= iPage; break;
2797	case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages -= iPage; break;
2798	case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages -= iPage; break;
2799	default:
2800	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2801	}
2802
2803	/*
2804	* Any threshold stuff to be done here?
2805	*/
2806
2807	return rc;
2808	}
2809
2810
2811	/**
2812	* Free one or more pages.
2813	*
2814	* This is typically used at reset time or power off.
2815	*
2816	* @returns VBox status code:
2817	* @retval xxx
2818	*
2819	* @param pVM Pointer to the shared VM structure.
2820	* @param idCpu VCPU id
2821	* @param cPages The number of pages to allocate.
2822	* @param paPages Pointer to the page descriptors containing the Page IDs for each page.
2823	* @param enmAccount The account this relates to.
2824	* @thread EMT.
2825	*/
2826	GMMR0DECL(int) GMMR0FreePages(PVM pVM, VMCPUID idCpu, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
2827	{
2828	LogFlow(("GMMR0FreePages: pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pVM, cPages, paPages, enmAccount));
2829
2830	/*
2831	* Validate input and get the basics.
2832	*/
2833	PGMM pGMM;
2834	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2835	PGVM pGVM;
2836	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2837	if (RT_FAILURE(rc))
2838	return rc;
2839
2840	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2841	AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
2842	AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
2843
2844	for (unsigned iPage = 0; iPage < cPages; iPage++)
2845	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2846	/\|\| paPages[iPage].idPage == NIL_GMM_PAGEID/,
2847	("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2848
2849	/*
2850	* Take the semaphore and call the worker function.
2851	*/
2852	rc = RTSemFastMutexRequest(pGMM->Mtx);
2853	AssertRC(rc);
2854	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2855	{
2856	rc = gmmR0FreePages(pGMM, pGVM, cPages, paPages, enmAccount);
2857	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2858	}
2859	else
2860	rc = VERR_INTERNAL_ERROR_5;
2861	RTSemFastMutexRelease(pGMM->Mtx);
2862	LogFlow(("GMMR0FreePages: returns %Rrc\n", rc));
2863	return rc;
2864	}
2865
2866
2867	/**
2868	* VMMR0 request wrapper for GMMR0FreePages.
2869	*
2870	* @returns see GMMR0FreePages.
2871	* @param pVM Pointer to the shared VM structure.
2872	* @param idCpu VCPU id
2873	* @param pReq The request packet.
2874	*/
2875	GMMR0DECL(int) GMMR0FreePagesReq(PVM pVM, VMCPUID idCpu, PGMMFREEPAGESREQ pReq)
2876	{
2877	/*
2878	* Validate input and pass it on.
2879	*/
2880	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2881	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2882	AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0]),
2883	("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0])),
2884	VERR_INVALID_PARAMETER);
2885	AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[pReq->cPages]),
2886	("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[pReq->cPages])),
2887	VERR_INVALID_PARAMETER);
2888
2889	return GMMR0FreePages(pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
2890	}
2891
2892
2893	/**
2894	* Report back on a memory ballooning request.
2895	*
2896	* The request may or may not have been initiated by the GMM. If it was initiated
2897	* by the GMM it is important that this function is called even if no pages were
2898	* ballooned.
2899	*
2900	* @returns VBox status code:
2901	* @retval VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH
2902	* @retval VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH
2903	* @retval VERR_GMM_OVERCOMMITED_TRY_AGAIN_IN_A_BIT - reset condition
2904	* indicating that we won't necessarily have sufficient RAM to boot
2905	* the VM again and that it should pause until this changes (we'll try
2906	* balloon some other VM). (For standard deflate we have little choice
2907	* but to hope the VM won't use the memory that was returned to it.)
2908	*
2909	* @param pVM Pointer to the shared VM structure.
2910	* @param idCpu VCPU id
2911	* @param enmAction Inflate/deflate/reset
2912	* @param cBalloonedPages The number of pages that was ballooned.
2913	*
2914	* @thread EMT.
2915	*/
2916	GMMR0DECL(int) GMMR0BalloonedPages(PVM pVM, VMCPUID idCpu, GMMBALLOONACTION enmAction, uint32_t cBalloonedPages)
2917	{
2918	LogFlow(("GMMR0BalloonedPages: pVM=%p enmAction=%d cBalloonedPages=%#x\n",
2919	pVM, enmAction, cBalloonedPages));
2920
2921	AssertMsgReturn(cBalloonedPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cBalloonedPages), VERR_INVALID_PARAMETER);
2922
2923	/*
2924	* Validate input and get the basics.
2925	*/
2926	PGMM pGMM;
2927	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2928	PGVM pGVM;
2929	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2930	if (RT_FAILURE(rc))
2931	return rc;
2932
2933	/*
2934	* Take the sempahore and do some more validations.
2935	*/
2936	rc = RTSemFastMutexRequest(pGMM->Mtx);
2937	AssertRC(rc);
2938	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2939	{
2940	switch (enmAction)
2941	{
2942	case GMMBALLOONACTION_INFLATE:
2943	{
2944	if (pGVM->gmm.s.Allocated.cBasePages >= cBalloonedPages)
2945	{
2946	/*
2947	* Record the ballooned memory.
2948	*/
2949	pGMM->cBalloonedPages += cBalloonedPages;
2950	if (pGVM->gmm.s.cReqBalloonedPages)
2951	{
2952	/* Codepath never taken. Might be interesting in the future to request ballooned memory from guests in low memory conditions.. */
2953	AssertFailed();
2954
2955	pGVM->gmm.s.cBalloonedPages += cBalloonedPages;
2956	pGVM->gmm.s.cReqActuallyBalloonedPages += cBalloonedPages;
2957	Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx Req=%#llx Actual=%#llx (pending)\n", cBalloonedPages,
2958	pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqBalloonedPages, pGVM->gmm.s.cReqActuallyBalloonedPages));
2959	}
2960	else
2961	{
2962	pGVM->gmm.s.cBalloonedPages += cBalloonedPages;
2963	Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx (user)\n",
2964	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages));
2965	}
2966	}
2967	else
2968	rc = VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2969	break;
2970	}
2971
2972	case GMMBALLOONACTION_DEFLATE:
2973	{
2974	/* Deflate. */
2975	if (pGVM->gmm.s.cBalloonedPages >= cBalloonedPages)
2976	{
2977	/*
2978	* Record the ballooned memory.
2979	*/
2980	Assert(pGMM->cBalloonedPages >= cBalloonedPages);
2981	pGMM->cBalloonedPages -= cBalloonedPages;
2982	pGVM->gmm.s.cBalloonedPages -= cBalloonedPages;
2983	if (pGVM->gmm.s.cReqDeflatePages)
2984	{
2985	AssertFailed(); /* This is path is for later. */
2986	Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx Req=%#llx\n",
2987	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqDeflatePages));
2988
2989	/*
2990	* Anything we need to do here now when the request has been completed?
2991	*/
2992	pGVM->gmm.s.cReqDeflatePages = 0;
2993	}
2994	else
2995	Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx (user)\n",
2996	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages));
2997	}
2998	else
2999	rc = VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH;
3000	break;
3001	}
3002
3003	case GMMBALLOONACTION_RESET:
3004	{
3005	/* Reset to an empty balloon. */
3006	Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.cBalloonedPages);
3007
3008	pGMM->cBalloonedPages -= pGVM->gmm.s.cBalloonedPages;
3009	pGVM->gmm.s.cBalloonedPages = 0;
3010	break;
3011	}
3012
3013	default:
3014	rc = VERR_INVALID_PARAMETER;
3015	break;
3016	}
3017	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3018	}
3019	else
3020	rc = VERR_INTERNAL_ERROR_5;
3021
3022	RTSemFastMutexRelease(pGMM->Mtx);
3023	LogFlow(("GMMR0BalloonedPages: returns %Rrc\n", rc));
3024	return rc;
3025	}
3026
3027
3028	/**
3029	* VMMR0 request wrapper for GMMR0BalloonedPages.
3030	*
3031	* @returns see GMMR0BalloonedPages.
3032	* @param pVM Pointer to the shared VM structure.
3033	* @param idCpu VCPU id
3034	* @param pReq The request packet.
3035	*/
3036	GMMR0DECL(int) GMMR0BalloonedPagesReq(PVM pVM, VMCPUID idCpu, PGMMBALLOONEDPAGESREQ pReq)
3037	{
3038	/*
3039	* Validate input and pass it on.
3040	*/
3041	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3042	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3043	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMBALLOONEDPAGESREQ),
3044	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMBALLOONEDPAGESREQ)),
3045	VERR_INVALID_PARAMETER);
3046
3047	return GMMR0BalloonedPages(pVM, idCpu, pReq->enmAction, pReq->cBalloonedPages);
3048	}
3049
3050	/**
3051	* Return memory statistics for the hypervisor
3052	*
3053	* @returns VBox status code:
3054	* @param pVM Pointer to the shared VM structure.
3055	* @param pReq The request packet.
3056	*/
3057	GMMR0DECL(int) GMMR0QueryHypervisorMemoryStatsReq(PVM pVM, PGMMMEMSTATSREQ pReq)
3058	{
3059	/*
3060	* Validate input and pass it on.
3061	*/
3062	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3063	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3064	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
3065	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
3066	VERR_INVALID_PARAMETER);
3067
3068	/*
3069	* Validate input and get the basics.
3070	*/
3071	PGMM pGMM;
3072	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3073	pReq->cAllocPages = pGMM->cAllocatedPages;
3074	pReq->cFreePages = (pGMM->cChunks << (GMM_CHUNK_SHIFT- PAGE_SHIFT)) - pGMM->cAllocatedPages;
3075	pReq->cBalloonedPages = pGMM->cBalloonedPages;
3076	pReq->cMaxPages = pGMM->cMaxPages;
3077	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3078
3079	return VINF_SUCCESS;
3080	}
3081
3082	/**
3083	* Return memory statistics for the VM
3084	*
3085	* @returns VBox status code:
3086	* @param pVM Pointer to the shared VM structure.
3087	* @parma idCpu Cpu id.
3088	* @param pReq The request packet.
3089	*/
3090	GMMR0DECL(int) GMMR0QueryMemoryStatsReq(PVM pVM, VMCPUID idCpu, PGMMMEMSTATSREQ pReq)
3091	{
3092	/*
3093	* Validate input and pass it on.
3094	*/
3095	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3096	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3097	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
3098	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
3099	VERR_INVALID_PARAMETER);
3100
3101	/*
3102	* Validate input and get the basics.
3103	*/
3104	PGMM pGMM;
3105	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3106	PGVM pGVM;
3107	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3108	if (RT_FAILURE(rc))
3109	return rc;
3110
3111	/*
3112	* Take the sempahore and do some more validations.
3113	*/
3114	rc = RTSemFastMutexRequest(pGMM->Mtx);
3115	AssertRC(rc);
3116	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3117	{
3118	pReq->cAllocPages = pGVM->gmm.s.Allocated.cBasePages;
3119	pReq->cBalloonedPages = pGVM->gmm.s.cBalloonedPages;
3120	pReq->cMaxPages = pGVM->gmm.s.Reserved.cBasePages;
3121	pReq->cFreePages = pReq->cMaxPages - pReq->cAllocPages;
3122	}
3123	else
3124	rc = VERR_INTERNAL_ERROR_5;
3125
3126	RTSemFastMutexRelease(pGMM->Mtx);
3127	LogFlow(("GMMR3QueryVMMemoryStats: returns %Rrc\n", rc));
3128	return rc;
3129	}
3130
3131	/**
3132	* Unmaps a chunk previously mapped into the address space of the current process.
3133	*
3134	* @returns VBox status code.
3135	* @param pGMM Pointer to the GMM instance data.
3136	* @param pGVM Pointer to the Global VM structure.
3137	* @param pChunk Pointer to the chunk to be unmapped.
3138	*/
3139	static int gmmR0UnmapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
3140	{
3141	if (!pGMM->fLegacyAllocationMode)
3142	{
3143	/*
3144	* Find the mapping and try unmapping it.
3145	*/
3146	for (uint32_t i = 0; i < pChunk->cMappings; i++)
3147	{
3148	Assert(pChunk->paMappings[i].pGVM && pChunk->paMappings[i].MapObj != NIL_RTR0MEMOBJ);
3149	if (pChunk->paMappings[i].pGVM == pGVM)
3150	{
3151	/* unmap */
3152	int rc = RTR0MemObjFree(pChunk->paMappings[i].MapObj, false /* fFreeMappings (NA) */);
3153	if (RT_SUCCESS(rc))
3154	{
3155	/* update the record. */
3156	pChunk->cMappings--;
3157	if (i < pChunk->cMappings)
3158	pChunk->paMappings[i] = pChunk->paMappings[pChunk->cMappings];
3159	pChunk->paMappings[pChunk->cMappings].MapObj = NIL_RTR0MEMOBJ;
3160	pChunk->paMappings[pChunk->cMappings].pGVM = NULL;
3161	}
3162	return rc;
3163	}
3164	}
3165	}
3166	else if (pChunk->hGVM == pGVM->hSelf)
3167	return VINF_SUCCESS;
3168
3169	Log(("gmmR0MapChunk: Chunk %#x is not mapped into pGVM=%p/%#x\n", pChunk->Core.Key, pGVM, pGVM->hSelf));
3170	return VERR_GMM_CHUNK_NOT_MAPPED;
3171	}
3172
3173
3174	/**
3175	* Maps a chunk into the user address space of the current process.
3176	*
3177	* @returns VBox status code.
3178	* @param pGMM Pointer to the GMM instance data.
3179	* @param pGVM Pointer to the Global VM structure.
3180	* @param pChunk Pointer to the chunk to be mapped.
3181	* @param ppvR3 Where to store the ring-3 address of the mapping.
3182	* In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
3183	* contain the address of the existing mapping.
3184	*/
3185	static int gmmR0MapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
3186	{
3187	/*
3188	* If we're in legacy mode this is simple.
3189	*/
3190	if (pGMM->fLegacyAllocationMode)
3191	{
3192	if (pChunk->hGVM != pGVM->hSelf)
3193	{
3194	Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
3195	return VERR_GMM_CHUNK_NOT_FOUND;
3196	}
3197
3198	*ppvR3 = RTR0MemObjAddressR3(pChunk->MemObj);
3199	return VINF_SUCCESS;
3200	}
3201
3202	/*
3203	* Check to see if the chunk is already mapped.
3204	*/
3205	for (uint32_t i = 0; i < pChunk->cMappings; i++)
3206	{
3207	Assert(pChunk->paMappings[i].pGVM && pChunk->paMappings[i].MapObj != NIL_RTR0MEMOBJ);
3208	if (pChunk->paMappings[i].pGVM == pGVM)
3209	{
3210	*ppvR3 = RTR0MemObjAddressR3(pChunk->paMappings[i].MapObj);
3211	Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
3212	return VERR_GMM_CHUNK_ALREADY_MAPPED;
3213	}
3214	}
3215
3216	/*
3217	* Do the mapping.
3218	*/
3219	RTR0MEMOBJ MapObj;
3220	int rc = RTR0MemObjMapUser(&MapObj, pChunk->MemObj, (RTR3PTR)-1, 0, RTMEM_PROT_READ \| RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
3221	if (RT_SUCCESS(rc))
3222	{
3223	/* reallocate the array? */
3224	if ((pChunk->cMappings & 1 /7/) == 0)
3225	{
3226	void pvMappings = RTMemRealloc(pChunk->paMappings, (pChunk->cMappings + 2 /8/) sizeof(pChunk->paMappings[0]));
3227	if (RT_UNLIKELY(!pvMappings))
3228	{
3229	rc = RTR0MemObjFree(MapObj, false /* fFreeMappings (NA) */);
3230	AssertRC(rc);
3231	return VERR_NO_MEMORY;
3232	}
3233	pChunk->paMappings = (PGMMCHUNKMAP)pvMappings;
3234	}
3235
3236	/* insert new entry */
3237	pChunk->paMappings[pChunk->cMappings].MapObj = MapObj;
3238	pChunk->paMappings[pChunk->cMappings].pGVM = pGVM;
3239	pChunk->cMappings++;
3240
3241	*ppvR3 = RTR0MemObjAddressR3(MapObj);
3242	}
3243
3244	return rc;
3245	}
3246
3247	/**
3248	* Check if a chunk is mapped into the specified VM
3249	*
3250	* @returns mapped yes/no
3251	* @param pGVM Pointer to the Global VM structure.
3252	* @param pChunk Pointer to the chunk to be mapped.
3253	* @param ppvR3 Where to store the ring-3 address of the mapping.
3254	*/
3255	static int gmmR0IsChunkMapped(PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
3256	{
3257	/*
3258	* Check to see if the chunk is already mapped.
3259	*/
3260	for (uint32_t i = 0; i < pChunk->cMappings; i++)
3261	{
3262	Assert(pChunk->paMappings[i].pGVM && pChunk->paMappings[i].MapObj != NIL_RTR0MEMOBJ);
3263	if (pChunk->paMappings[i].pGVM == pGVM)
3264	{
3265	*ppvR3 = RTR0MemObjAddressR3(pChunk->paMappings[i].MapObj);
3266	return true;
3267	}
3268	}
3269	*ppvR3 = NULL;
3270	return false;
3271	}
3272
3273	/**
3274	* Map a chunk and/or unmap another chunk.
3275	*
3276	* The mapping and unmapping applies to the current process.
3277	*
3278	* This API does two things because it saves a kernel call per mapping when
3279	* when the ring-3 mapping cache is full.
3280	*
3281	* @returns VBox status code.
3282	* @param pVM The VM.
3283	* @param idCpu VCPU id
3284	* @param idChunkMap The chunk to map. NIL_GMM_CHUNKID if nothing to map.
3285	* @param idChunkUnmap The chunk to unmap. NIL_GMM_CHUNKID if nothing to unmap.
3286	* @param ppvR3 Where to store the address of the mapped chunk. NULL is ok if nothing to map.
3287	* @thread EMT
3288	*/
3289	GMMR0DECL(int) GMMR0MapUnmapChunk(PVM pVM, VMCPUID idCpu, uint32_t idChunkMap, uint32_t idChunkUnmap, PRTR3PTR ppvR3)
3290	{
3291	LogFlow(("GMMR0MapUnmapChunk: pVM=%p idChunkMap=%#x idChunkUnmap=%#x ppvR3=%p\n",
3292	pVM, idChunkMap, idChunkUnmap, ppvR3));
3293
3294	/*
3295	* Validate input and get the basics.
3296	*/
3297	PGMM pGMM;
3298	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3299	PGVM pGVM;
3300	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3301	if (RT_FAILURE(rc))
3302	return rc;
3303
3304	AssertCompile(NIL_GMM_CHUNKID == 0);
3305	AssertMsgReturn(idChunkMap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkMap), VERR_INVALID_PARAMETER);
3306	AssertMsgReturn(idChunkUnmap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkUnmap), VERR_INVALID_PARAMETER);
3307
3308	if ( idChunkMap == NIL_GMM_CHUNKID
3309	&& idChunkUnmap == NIL_GMM_CHUNKID)
3310	return VERR_INVALID_PARAMETER;
3311
3312	if (idChunkMap != NIL_GMM_CHUNKID)
3313	{
3314	AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3315	*ppvR3 = NIL_RTR3PTR;
3316	}
3317
3318	/*
3319	* Take the semaphore and do the work.
3320	*
3321	* The unmapping is done last since it's easier to undo a mapping than
3322	* undoing an unmapping. The ring-3 mapping cache cannot not be so big
3323	* that it pushes the user virtual address space to within a chunk of
3324	* it it's limits, so, no problem here.
3325	*/
3326	rc = RTSemFastMutexRequest(pGMM->Mtx);
3327	AssertRC(rc);
3328	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3329	{
3330	PGMMCHUNK pMap = NULL;
3331	if (idChunkMap != NIL_GVM_HANDLE)
3332	{
3333	pMap = gmmR0GetChunk(pGMM, idChunkMap);
3334	if (RT_LIKELY(pMap))
3335	rc = gmmR0MapChunk(pGMM, pGVM, pMap, ppvR3);
3336	else
3337	{
3338	Log(("GMMR0MapUnmapChunk: idChunkMap=%#x\n", idChunkMap));
3339	rc = VERR_GMM_CHUNK_NOT_FOUND;
3340	}
3341	}
3342
3343	if ( idChunkUnmap != NIL_GMM_CHUNKID
3344	&& RT_SUCCESS(rc))
3345	{
3346	PGMMCHUNK pUnmap = gmmR0GetChunk(pGMM, idChunkUnmap);
3347	if (RT_LIKELY(pUnmap))
3348	rc = gmmR0UnmapChunk(pGMM, pGVM, pUnmap);
3349	else
3350	{
3351	Log(("GMMR0MapUnmapChunk: idChunkUnmap=%#x\n", idChunkUnmap));
3352	rc = VERR_GMM_CHUNK_NOT_FOUND;
3353	}
3354
3355	if (RT_FAILURE(rc) && pMap)
3356	gmmR0UnmapChunk(pGMM, pGVM, pMap);
3357	}
3358
3359	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3360	}
3361	else
3362	rc = VERR_INTERNAL_ERROR_5;
3363	RTSemFastMutexRelease(pGMM->Mtx);
3364
3365	LogFlow(("GMMR0MapUnmapChunk: returns %Rrc\n", rc));
3366	return rc;
3367	}
3368
3369
3370	/**
3371	* VMMR0 request wrapper for GMMR0MapUnmapChunk.
3372	*
3373	* @returns see GMMR0MapUnmapChunk.
3374	* @param pVM Pointer to the shared VM structure.
3375	* @param idCpu VCPU id
3376	* @param pReq The request packet.
3377	*/
3378	GMMR0DECL(int) GMMR0MapUnmapChunkReq(PVM pVM, VMCPUID idCpu, PGMMMAPUNMAPCHUNKREQ pReq)
3379	{
3380	/*
3381	* Validate input and pass it on.
3382	*/
3383	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3384	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3385	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
3386
3387	return GMMR0MapUnmapChunk(pVM, idCpu, pReq->idChunkMap, pReq->idChunkUnmap, &pReq->pvR3);
3388	}
3389
3390
3391	/**
3392	* Legacy mode API for supplying pages.
3393	*
3394	* The specified user address points to a allocation chunk sized block that
3395	* will be locked down and used by the GMM when the GM asks for pages.
3396	*
3397	* @returns VBox status code.
3398	* @param pVM The VM.
3399	* @param idCpu VCPU id
3400	* @param pvR3 Pointer to the chunk size memory block to lock down.
3401	*/
3402	GMMR0DECL(int) GMMR0SeedChunk(PVM pVM, VMCPUID idCpu, RTR3PTR pvR3)
3403	{
3404	/*
3405	* Validate input and get the basics.
3406	*/
3407	PGMM pGMM;
3408	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3409	PGVM pGVM;
3410	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3411	if (RT_FAILURE(rc))
3412	return rc;
3413
3414	AssertPtrReturn(pvR3, VERR_INVALID_POINTER);
3415	AssertReturn(!(PAGE_OFFSET_MASK & pvR3), VERR_INVALID_POINTER);
3416
3417	if (!pGMM->fLegacyAllocationMode)
3418	{
3419	Log(("GMMR0SeedChunk: not in legacy allocation mode!\n"));
3420	return VERR_NOT_SUPPORTED;
3421	}
3422
3423	/*
3424	* Lock the memory before taking the semaphore.
3425	*/
3426	RTR0MEMOBJ MemObj;
3427	rc = RTR0MemObjLockUser(&MemObj, pvR3, GMM_CHUNK_SIZE, RTMEM_PROT_READ \| RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
3428	if (RT_SUCCESS(rc))
3429	{
3430	/* Grab the lock. */
3431	rc = RTSemFastMutexRequest(pGMM->Mtx);
3432	AssertRCReturn(rc, rc);
3433
3434	/*
3435	* Add a new chunk with our hGVM.
3436	*/
3437	rc = gmmR0RegisterChunk(pGMM, &pGMM->Private, MemObj, pGVM->hSelf, GMMCHUNKTYPE_NON_CONTINUOUS);
3438	RTSemFastMutexRelease(pGMM->Mtx);
3439
3440	if (RT_FAILURE(rc))
3441	RTR0MemObjFree(MemObj, false /* fFreeMappings */);
3442	}
3443
3444	LogFlow(("GMMR0SeedChunk: rc=%d (pvR3=%p)\n", rc, pvR3));
3445	return rc;
3446	}
3447
3448
3449	/**
3450	* Registers a new shared module for the VM
3451	*
3452	* @returns VBox status code.
3453	* @param pVM VM handle
3454	* @param idCpu VCPU id
3455	* @param enmGuestOS Guest OS type
3456	* @param pszModuleName Module name
3457	* @param pszVersion Module version
3458	* @param GCBaseAddr Module base address
3459	* @param cbModule Module size
3460	* @param cRegions Number of shared region descriptors
3461	* @param pRegions Shared region(s)
3462	*/
3463	GMMR0DECL(int) GMMR0RegisterSharedModule(PVM pVM, VMCPUID idCpu, VBOXOSFAMILY enmGuestOS, char pszModuleName, char pszVersion, RTGCPTR GCBaseAddr, uint32_t cbModule,
3464	unsigned cRegions, VMMDEVSHAREDREGIONDESC *pRegions)
3465	{
3466	#ifdef VBOX_WITH_PAGE_SHARING
3467	/*
3468	* Validate input and get the basics.
3469	*/
3470	PGMM pGMM;
3471	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3472	PGVM pGVM;
3473	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3474	if (RT_FAILURE(rc))
3475	return rc;
3476
3477	Log(("GMMR0RegisterSharedModule %s %s base %RGv size %x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule));
3478
3479	/*
3480	* Take the sempahore and do some more validations.
3481	*/
3482	rc = RTSemFastMutexRequest(pGMM->Mtx);
3483	AssertRC(rc);
3484	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3485	{
3486	bool fNewModule = false;
3487
3488	/* Check if this module is already locally registered. */
3489	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
3490	if (!pRecVM)
3491	{
3492	pRecVM = (PGMMSHAREDMODULEPERVM)RTMemAllocZ(sizeof(*pRecVM));
3493	if (!pRecVM)
3494	{
3495	AssertFailed();
3496	rc = VERR_NO_MEMORY;
3497	goto end;
3498	}
3499	pRecVM->Core.Key = GCBaseAddr;
3500
3501	bool ret = RTAvlGCPtrInsert(&pGVM->gmm.s.pSharedModuleTree, &pRecVM->Core);
3502	Assert(ret);
3503
3504	Log(("GMMR0RegisterSharedModule: new local module %s\n", pszModuleName));
3505	fNewModule = true;
3506	}
3507	else
3508	rc = VINF_PGM_SHARED_MODULE_ALREADY_REGISTERED;
3509
3510	/* Check if this module is already globally registered. */
3511	PGMMSHAREDMODULE pGlobalModule = (PGMMSHAREDMODULE)RTAvlGCPtrGet(&pGMM->pGlobalSharedModuleTree, GCBaseAddr);
3512	if (!pGlobalModule)
3513	{
3514	Assert(fNewModule);
3515	Assert(!pRecVM->fCollision);
3516
3517	pGlobalModule = (PGMMSHAREDMODULE)RTMemAllocZ(RT_OFFSETOF(GMMSHAREDMODULE, aRegions[cRegions]));
3518	if (!pGlobalModule)
3519	{
3520	AssertFailed();
3521	rc = VERR_NO_MEMORY;
3522	goto end;
3523	}
3524
3525	pGlobalModule->Core.Key = GCBaseAddr;
3526	pGlobalModule->cbModule = cbModule;
3527	/* Input limit already safe; no need to check again. */
3528	/** todo replace with RTStrCopy */
3529	strcpy(pGlobalModule->szName, pszModuleName);
3530	strcpy(pGlobalModule->szVersion, pszVersion);
3531
3532	pGlobalModule->enmGuestOS = enmGuestOS;
3533	pGlobalModule->cRegions = cRegions;
3534
3535	for (unsigned i = 0; i < cRegions; i++)
3536	{
3537	pGlobalModule->aRegions[i].GCRegionAddr = pRegions[i].GCRegionAddr;
3538	pGlobalModule->aRegions[i].cbRegion = pRegions[i].cbRegion;
3539	pGlobalModule->aRegions[i].u32Alignment = 0;
3540	pGlobalModule->aRegions[i].paHCPhysPageID = NULL; /* uninitialized. */
3541	}
3542
3543	/* Save reference. */
3544	pRecVM->pGlobalModule = pGlobalModule;
3545	pRecVM->fCollision = false;
3546	pGlobalModule->cUsers++;
3547	rc = VINF_SUCCESS;
3548
3549	bool ret = RTAvlGCPtrInsert(&pGMM->pGlobalSharedModuleTree, &pGlobalModule->Core);
3550	Assert(ret);
3551
3552	Log(("GMMR0RegisterSharedModule: new global module %s\n", pszModuleName));
3553	}
3554	else
3555	{
3556	Assert(pGlobalModule->cUsers > 0);
3557
3558	/* Make sure the name and version are identical. */
3559	/** todo replace with RTStrNCmp */
3560	if ( !strcmp(pGlobalModule->szName, pszModuleName)
3561	&& !strcmp(pGlobalModule->szVersion, pszVersion))
3562	{
3563	/* Save reference. */
3564	pRecVM->pGlobalModule = pGlobalModule;
3565	if ( fNewModule
3566	\|\| pRecVM->fCollision == true) /* colliding module unregistered and new one registerd since the last check */
3567	{
3568	pGlobalModule->cUsers++;
3569	Log(("GMMR0RegisterSharedModule: using existing module %s cUser=%d!\n", pszModuleName, pGlobalModule->cUsers));
3570	}
3571	pRecVM->fCollision = false;
3572	rc = VINF_SUCCESS;
3573	}
3574	else
3575	{
3576	Log(("GMMR0RegisterSharedModule: module %s collision!\n", pszModuleName));
3577	pRecVM->fCollision = true;
3578	rc = VINF_PGM_SHARED_MODULE_COLLISION;
3579	goto end;
3580	}
3581	}
3582
3583	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3584	}
3585	else
3586	rc = VERR_INTERNAL_ERROR_5;
3587
3588	end:
3589	RTSemFastMutexRelease(pGMM->Mtx);
3590	return rc;
3591	#else
3592	return VERR_NOT_IMPLEMENTED;
3593	#endif
3594	}
3595
3596
3597	/**
3598	* VMMR0 request wrapper for GMMR0RegisterSharedModule.
3599	*
3600	* @returns see GMMR0RegisterSharedModule.
3601	* @param pVM Pointer to the shared VM structure.
3602	* @param idCpu VCPU id
3603	* @param pReq The request packet.
3604	*/
3605	GMMR0DECL(int) GMMR0RegisterSharedModuleReq(PVM pVM, VMCPUID idCpu, PGMMREGISTERSHAREDMODULEREQ pReq)
3606	{
3607	/*
3608	* Validate input and pass it on.
3609	*/
3610	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3611	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3612	AssertMsgReturn(pReq->Hdr.cbReq >= sizeof(pReq) && pReq->Hdr.cbReq == RT_UOFFSETOF(GMMREGISTERSHAREDMODULEREQ, aRegions[pReq->cRegions]), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
3613
3614	return GMMR0RegisterSharedModule(pVM, idCpu, pReq->enmGuestOS, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule, pReq->cRegions, pReq->aRegions);
3615	}
3616
3617	/**
3618	* Unregisters a shared module for the VM
3619	*
3620	* @returns VBox status code.
3621	* @param pVM VM handle
3622	* @param idCpu VCPU id
3623	* @param pszModuleName Module name
3624	* @param pszVersion Module version
3625	* @param GCBaseAddr Module base address
3626	* @param cbModule Module size
3627	*/
3628	GMMR0DECL(int) GMMR0UnregisterSharedModule(PVM pVM, VMCPUID idCpu, char pszModuleName, char pszVersion, RTGCPTR GCBaseAddr, uint32_t cbModule)
3629	{
3630	#ifdef VBOX_WITH_PAGE_SHARING
3631	/*
3632	* Validate input and get the basics.
3633	*/
3634	PGMM pGMM;
3635	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3636	PGVM pGVM;
3637	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3638	if (RT_FAILURE(rc))
3639	return rc;
3640
3641	Log(("GMMR0UnregisterSharedModule %s %s base=%RGv size %x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule));
3642
3643	/*
3644	* Take the sempahore and do some more validations.
3645	*/
3646	rc = RTSemFastMutexRequest(pGMM->Mtx);
3647	AssertRC(rc);
3648	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3649	{
3650	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
3651	if (!pRecVM)
3652	{
3653	rc = VERR_PGM_SHARED_MODULE_NOT_FOUND;
3654	goto end;
3655	}
3656	/* Remove reference to global shared module. */
3657	if (!pRecVM->fCollision)
3658	{
3659	PGMMSHAREDMODULE pRec = pRecVM->pGlobalModule;
3660	Assert(pRec);
3661
3662	if (pRec) /* paranoia */
3663	{
3664	Assert(pRec->cUsers);
3665	pRec->cUsers--;
3666	if (pRec->cUsers == 0)
3667	{
3668	/* Free the ranges, but leave the pages intact as there might still be references; they will be cleared by the COW mechanism. */
3669	for (unsigned i = 0; i < pRec->cRegions; i++)
3670	if (pRec->aRegions[i].paHCPhysPageID)
3671	RTMemFree(pRec->aRegions[i].paHCPhysPageID);
3672
3673	/* Remove from the tree and free memory. */
3674	RTAvlGCPtrRemove(&pGMM->pGlobalSharedModuleTree, GCBaseAddr);
3675	RTMemFree(pRec);
3676	}
3677	}
3678	else
3679	rc = VERR_PGM_SHARED_MODULE_REGISTRATION_INCONSISTENCY;
3680	}
3681	else
3682	Assert(!pRecVM->pGlobalModule);
3683
3684	/* Remove from the tree and free memory. */
3685	RTAvlGCPtrRemove(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
3686	RTMemFree(pRecVM);
3687
3688	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3689	}
3690	else
3691	rc = VERR_INTERNAL_ERROR_5;
3692
3693	end:
3694	RTSemFastMutexRelease(pGMM->Mtx);
3695	return rc;
3696	#else
3697	return VERR_NOT_IMPLEMENTED;
3698	#endif
3699	}
3700
3701	/**
3702	* VMMR0 request wrapper for GMMR0UnregisterSharedModule.
3703	*
3704	* @returns see GMMR0UnregisterSharedModule.
3705	* @param pVM Pointer to the shared VM structure.
3706	* @param idCpu VCPU id
3707	* @param pReq The request packet.
3708	*/
3709	GMMR0DECL(int) GMMR0UnregisterSharedModuleReq(PVM pVM, VMCPUID idCpu, PGMMUNREGISTERSHAREDMODULEREQ pReq)
3710	{
3711	/*
3712	* Validate input and pass it on.
3713	*/
3714	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3715	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3716	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
3717
3718	return GMMR0UnregisterSharedModule(pVM, idCpu, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule);
3719	}
3720
3721
3722	#ifdef VBOX_WITH_PAGE_SHARING
3723	/**
3724	* Checks specified shared module range for changes
3725	*
3726	* Performs the following tasks:
3727	* - if a shared page is new, then it changes the GMM page type to shared and returns it in the paPageDesc array
3728	* - if a shared page already exists, then it checks if the VM page is identical and if so frees the VM page and returns the shared page in the paPageDesc array
3729	*
3730	* Note: assumes the caller has acquired the GMM semaphore!!
3731	*
3732	* @returns VBox status code.
3733	* @param pGMM Pointer to the GMM instance data.
3734	* @param pGVM Pointer to the GVM instance data.
3735	* @param pModule Module description
3736	* @param idxRegion Region index
3737	* @param cPages Number of entries in the paPageDesc array
3738	* @param paPageDesc Page descriptor array (in/out)
3739	*/
3740	GMMR0DECL(int) GMMR0SharedModuleCheckRange(PGVM pGVM, PGMMSHAREDMODULE pModule, unsigned idxRegion, unsigned cPages, PGMMSHAREDPAGEDESC paPageDesc)
3741	{
3742	int rc = VINF_SUCCESS;
3743	PGMM pGMM;
3744	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3745
3746	AssertReturn(idxRegion < pModule->cRegions, VERR_INVALID_PARAMETER);
3747	AssertReturn(cPages == (pModule->aRegions[idxRegion].cbRegion >> PAGE_SHIFT), VERR_INVALID_PARAMETER);
3748
3749	Log(("GMMR0SharedModuleCheckRange %s base %RGv region %d cPages %d\n", pModule->szName, pModule->Core.Key, idxRegion, cPages));
3750
3751	PGMMSHAREDREGIONDESC pGlobalRegion = &pModule->aRegions[idxRegion];
3752
3753	if (!pGlobalRegion->paHCPhysPageID)
3754	{
3755	/* First time; create a page descriptor array. */
3756	Log(("Allocate page descriptor array for %d pages\n", cPages));
3757	pGlobalRegion->paHCPhysPageID = (uint32_t )RTMemAlloc(cPages sizeof(*pGlobalRegion->paHCPhysPageID));
3758	if (!pGlobalRegion->paHCPhysPageID)
3759	{
3760	AssertFailed();
3761	rc = VERR_NO_MEMORY;
3762	goto end;
3763	}
3764	/* Invalidate all descriptors. */
3765	for (unsigned i = 0; i < cPages; i++)
3766	pGlobalRegion->paHCPhysPageID[i] = NIL_GMM_PAGEID;
3767	}
3768
3769	/* Check all pages in the region. */
3770	for (unsigned i = 0; i < cPages; i++)
3771	{
3772	/* Valid page present? */
3773	if (paPageDesc[i].uHCPhysPageId != NIL_GMM_PAGEID)
3774	{
3775	/* We've seen this shared page for the first time? */
3776	if (pGlobalRegion->paHCPhysPageID[i] == NIL_GMM_PAGEID)
3777	{
3778	/* Easy case: just change the internal page type. */
3779	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPageDesc[i].uHCPhysPageId);
3780	if (!pPage)
3781	{
3782	AssertFailed();
3783	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
3784	goto end;
3785	}
3786	Log(("New shared page guest %RGp host %RHp\n", paPageDesc[i].GCPhys, paPageDesc[i].HCPhys));
3787
3788	AssertMsg(paPageDesc[i].GCPhys == (pPage->Private.pfn << 12), ("desc %RGp gmm %RGp\n", paPageDesc[i].HCPhys, (pPage->Private.pfn << 12)));
3789
3790	gmmR0ConvertToSharedPage(pGMM, pGVM, paPageDesc[i].HCPhys, paPageDesc[i].uHCPhysPageId, pPage);
3791
3792	/* Keep track of these references. */
3793	pGlobalRegion->paHCPhysPageID[i] = paPageDesc[i].uHCPhysPageId;
3794	}
3795	else
3796	{
3797	uint8_t pbLocalPage, pbSharedPage;
3798	uint8_t *pbChunk;
3799	PGMMCHUNK pChunk;
3800
3801	Assert(paPageDesc[i].uHCPhysPageId != pGlobalRegion->paHCPhysPageID[i]);
3802
3803	/* Get the shared page source. */
3804	PGMMPAGE pPage = gmmR0GetPage(pGMM, pGlobalRegion->paHCPhysPageID[i]);
3805	if (!pPage)
3806	{
3807	AssertFailed();
3808	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
3809	goto end;
3810	}
3811	Assert(pPage->Common.u2State == GMM_PAGE_STATE_SHARED);
3812
3813	Log(("Replace existing page guest %RGp host %RHp -> %RHp\n", paPageDesc[i].GCPhys, paPageDesc[i].HCPhys, pPage->Shared.pfn << PAGE_SHIFT));
3814
3815	/* Calculate the virtual address of the local page. */
3816	pChunk = gmmR0GetChunk(pGMM, paPageDesc[i].uHCPhysPageId >> GMM_CHUNKID_SHIFT);
3817	if (pChunk)
3818	{
3819	if (!gmmR0IsChunkMapped(pGVM, pChunk, (PRTR3PTR)&pbChunk))
3820	{
3821	AssertFailed();
3822	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
3823	goto end;
3824	}
3825	pbLocalPage = pbChunk + ((paPageDesc[i].uHCPhysPageId & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
3826	}
3827	else
3828	{
3829	AssertFailed();
3830	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
3831	goto end;
3832	}
3833
3834	/* Calculate the virtual address of the shared page. */
3835	pChunk = gmmR0GetChunk(pGMM, pGlobalRegion->paHCPhysPageID[i] >> GMM_CHUNKID_SHIFT);
3836	Assert(pChunk); /* can't fail as gmmR0GetPage succeeded. */
3837
3838	/* Get the virtual address of the physical page; map the chunk into the VM process if not already done. */
3839	if (!gmmR0IsChunkMapped(pGVM, pChunk, (PRTR3PTR)&pbChunk))
3840	{
3841	rc = gmmR0MapChunk(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk);
3842	if (rc != VINF_SUCCESS)
3843	{
3844	AssertRC(rc);
3845	goto end;
3846	}
3847	}
3848	pbSharedPage = pbChunk + ((pGlobalRegion->paHCPhysPageID[i] & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
3849
3850	/** todo write ASMMemComparePage. */
3851	if (memcmp(pbSharedPage, pbLocalPage, PAGE_SIZE))
3852	{
3853	Log(("Unexpected differences found between local and shared page; skip\n"));
3854	/* Signal to the caller that this one hasn't changed. */
3855	paPageDesc[i].uHCPhysPageId = NIL_GMM_PAGEID;
3856	continue;
3857	}
3858
3859	/* Free the old local page. */
3860	GMMFREEPAGEDESC PageDesc;
3861
3862	PageDesc.idPage = paPageDesc[i].uHCPhysPageId;
3863	rc = gmmR0FreePages(pGMM, pGVM, 1, &PageDesc, GMMACCOUNT_BASE);
3864	AssertRC(rc);
3865
3866	gmmR0UseSharedPage(pGMM, pGVM, pPage);
3867
3868	/* Pass along the new physical address & page id. */
3869	paPageDesc[i].HCPhys = pPage->Shared.pfn << PAGE_SHIFT;
3870	paPageDesc[i].uHCPhysPageId = pGlobalRegion->paHCPhysPageID[i];
3871	}
3872	}
3873	}
3874	end:
3875	return rc;
3876	}
3877
3878	/**
3879	* RTAvlU32Destroy callback.
3880	*
3881	* @returns 0
3882	* @param pNode The node to destroy.
3883	* @param pvGVM The GVM handle.
3884	*/
3885	static DECLCALLBACK(int) gmmR0CleanupSharedModule(PAVLGCPTRNODECORE pNode, void *pvGVM)
3886	{
3887	PGVM pGVM = (PGVM)pvGVM;
3888	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)pNode;
3889
3890	Assert(pRecVM->pGlobalModule);
3891	if (pRecVM->pGlobalModule)
3892	{
3893	PGMMSHAREDMODULE pRec = pRecVM->pGlobalModule;
3894	Assert(pRec);
3895	Assert(pRec->cUsers);
3896
3897	Log(("gmmR0CleanupSharedModule: %s %s cUsers=%d\n", pRec->szName, pRec->szVersion, pRec->cUsers));
3898	pRec->cUsers--;
3899	if (pRec->cUsers == 0)
3900	{
3901	for (unsigned i = 0; i < pRec->cRegions; i++)
3902	if (pRec->aRegions[i].paHCPhysPageID)
3903	RTMemFree(pRec->aRegions[i].paHCPhysPageID);
3904
3905	RTMemFree(pRec);
3906	}
3907	}
3908	RTMemFree(pRecVM);
3909	return 0;
3910	}
3911	#endif
3912
3913	/**
3914	* Removes all shared modules for the specified VM
3915	*
3916	* @returns VBox status code.
3917	* @param pVM VM handle
3918	* @param idCpu VCPU id
3919	*/
3920	GMMR0DECL(int) GMMR0ResetSharedModules(PVM pVM, VMCPUID idCpu)
3921	{
3922	#ifdef VBOX_WITH_PAGE_SHARING
3923	/*
3924	* Validate input and get the basics.
3925	*/
3926	PGMM pGMM;
3927	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3928	PGVM pGVM;
3929	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3930	if (RT_FAILURE(rc))
3931	return rc;
3932
3933	/*
3934	* Take the sempahore and do some more validations.
3935	*/
3936	rc = RTSemFastMutexRequest(pGMM->Mtx);
3937	AssertRC(rc);
3938	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3939	{
3940	Log(("GMMR0ResetSharedModules\n"));
3941	RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, pGVM);
3942
3943	rc = VINF_SUCCESS;
3944	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3945	}
3946	else
3947	rc = VERR_INTERNAL_ERROR_5;
3948
3949	RTSemFastMutexRelease(pGMM->Mtx);
3950	return rc;
3951	#else
3952	return VERR_NOT_IMPLEMENTED;
3953	#endif
3954	}
3955
3956	#ifdef VBOX_WITH_PAGE_SHARING
3957	typedef struct
3958	{
3959	PGVM pGVM;
3960	VMCPUID idCpu;
3961	} GMMCHECKSHAREDMODULEINFO, *PGMMCHECKSHAREDMODULEINFO;
3962
3963	/**
3964	* Tree enumeration callback for checking a shared module.
3965	*/
3966	DECLCALLBACK(int) gmmR0CheckSharedModule(PAVLGCPTRNODECORE pNode, void *pvUser)
3967	{
3968	PGMMCHECKSHAREDMODULEINFO pInfo = (PGMMCHECKSHAREDMODULEINFO)pvUser;
3969	PGMMSHAREDMODULEPERVM pLocalModule = (PGMMSHAREDMODULEPERVM)pNode;
3970	PGMMSHAREDMODULE pGlobalModule = pLocalModule->pGlobalModule;
3971
3972	Log(("gmmR0CheckSharedModule: check %s %s base=%RGv size=%x collision=%d\n", pGlobalModule->szName, pGlobalModule->szVersion, pGlobalModule->Core.Key, pGlobalModule->cbModule, pLocalModule->fCollision));
3973	if (!pLocalModule->fCollision)
3974	{
3975	PGMR0SharedModuleCheckRegion(pInfo->pGVM->pVM, pInfo->idCpu, pGlobalModule, pInfo->pGVM);
3976	}
3977	return 0;
3978	}
3979	#endif
3980
3981	/**
3982	* Check all shared modules for the specified VM
3983	*
3984	* @returns VBox status code.
3985	* @param pVM VM handle
3986	* @param idCpu VCPU id
3987	*/
3988	GMMR0DECL(int) GMMR0CheckSharedModules(PVM pVM, VMCPUID idCpu)
3989	{
3990	#ifdef VBOX_WITH_PAGE_SHARING
3991	/*
3992	* Validate input and get the basics.
3993	*/
3994	PGMM pGMM;
3995	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3996	PGVM pGVM;
3997	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3998	if (RT_FAILURE(rc))
3999	return rc;
4000
4001	/*
4002	* Take the sempahore and do some more validations.
4003	*/
4004	rc = RTSemFastMutexRequest(pGMM->Mtx);
4005	AssertRC(rc);
4006	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4007	{
4008	GMMCHECKSHAREDMODULEINFO Info;
4009
4010	Log(("GMMR0CheckSharedModules\n"));
4011	Info.pGVM = pGVM;
4012	Info.idCpu = idCpu;
4013
4014	RTAvlGCPtrDoWithAll(&pGVM->gmm.s.pSharedModuleTree, true /* fFromLeft */, gmmR0CheckSharedModule, &Info);
4015
4016	rc = VINF_SUCCESS;
4017	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4018	}
4019	else
4020	rc = VERR_INTERNAL_ERROR_5;
4021
4022	RTSemFastMutexRelease(pGMM->Mtx);
4023	return rc;
4024	#else
4025	return VERR_NOT_IMPLEMENTED;
4026	#endif
4027	}

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMR0/GMMR0.cpp@ 29509

Download in other formats: