GMMR0.cpp@ 37214

Last change on this file since 37214 was 37214, checked in by vboxsync, 14 years ago
GMMR0: Added a dedicated for empty chunks.
Property svn:eol-style set to `native` Property svn:keywords set to `Id`
File size: 169.4 KB

Line
1	/* $Id: GMMR0.cpp 37214 2011-05-25 14:38:28Z vboxsync $ */
2	/** @file
3	* GMM - Global Memory Manager.
4	*/
5
6	/*
7	* Copyright (C) 2007-2011 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18
19	/** @page pg_gmm GMM - The Global Memory Manager
20	*
21	* As the name indicates, this component is responsible for global memory
22	* management. Currently only guest RAM is allocated from the GMM, but this
23	* may change to include shadow page tables and other bits later.
24	*
25	* Guest RAM is managed as individual pages, but allocated from the host OS
26	* in chunks for reasons of portability / efficiency. To minimize the memory
27	* footprint all tracking structure must be as small as possible without
28	* unnecessary performance penalties.
29	*
30	* The allocation chunks has fixed sized, the size defined at compile time
31	* by the #GMM_CHUNK_SIZE \#define.
32	*
33	* Each chunk is given an unique ID. Each page also has a unique ID. The
34	* relation ship between the two IDs is:
35	* @code
36	* GMM_CHUNK_SHIFT = log2(GMM_CHUNK_SIZE / PAGE_SIZE);
37	* idPage = (idChunk << GMM_CHUNK_SHIFT) \| iPage;
38	* @endcode
39	* Where iPage is the index of the page within the chunk. This ID scheme
40	* permits for efficient chunk and page lookup, but it relies on the chunk size
41	* to be set at compile time. The chunks are organized in an AVL tree with their
42	* IDs being the keys.
43	*
44	* The physical address of each page in an allocation chunk is maintained by
45	* the #RTR0MEMOBJ and obtained using #RTR0MemObjGetPagePhysAddr. There is no
46	* need to duplicate this information (it'll cost 8-bytes per page if we did).
47	*
48	* So what do we need to track per page? Most importantly we need to know
49	* which state the page is in:
50	* - Private - Allocated for (eventually) backing one particular VM page.
51	* - Shared - Readonly page that is used by one or more VMs and treated
52	* as COW by PGM.
53	* - Free - Not used by anyone.
54	*
55	* For the page replacement operations (sharing, defragmenting and freeing)
56	* to be somewhat efficient, private pages needs to be associated with a
57	* particular page in a particular VM.
58	*
59	* Tracking the usage of shared pages is impractical and expensive, so we'll
60	* settle for a reference counting system instead.
61	*
62	* Free pages will be chained on LIFOs
63	*
64	* On 64-bit systems we will use a 64-bit bitfield per page, while on 32-bit
65	* systems a 32-bit bitfield will have to suffice because of address space
66	* limitations. The #GMMPAGE structure shows the details.
67	*
68	*
69	* @section sec_gmm_alloc_strat Page Allocation Strategy
70	*
71	* The strategy for allocating pages has to take fragmentation and shared
72	* pages into account, or we may end up with with 2000 chunks with only
73	* a few pages in each. Shared pages cannot easily be reallocated because
74	* of the inaccurate usage accounting (see above). Private pages can be
75	* reallocated by a defragmentation thread in the same manner that sharing
76	* is done.
77	*
78	* The first approach is to manage the free pages in two sets depending on
79	* whether they are mainly for the allocation of shared or private pages.
80	* In the initial implementation there will be almost no possibility for
81	* mixing shared and private pages in the same chunk (only if we're really
82	* stressed on memory), but when we implement forking of VMs and have to
83	* deal with lots of COW pages it'll start getting kind of interesting.
84	*
85	* The sets are lists of chunks with approximately the same number of
86	* free pages. Say the chunk size is 1MB, meaning 256 pages, and a set
87	* consists of 16 lists. So, the first list will contain the chunks with
88	* 1-7 free pages, the second covers 8-15, and so on. The chunks will be
89	* moved between the lists as pages are freed up or allocated.
90	*
91	*
92	* @section sec_gmm_costs Costs
93	*
94	* The per page cost in kernel space is 32-bit plus whatever RTR0MEMOBJ
95	* entails. In addition there is the chunk cost of approximately
96	* (sizeof(RT0MEMOBJ) + sizeof(CHUNK)) / 2^CHUNK_SHIFT bytes per page.
97	*
98	* On Windows the per page #RTR0MEMOBJ cost is 32-bit on 32-bit windows
99	* and 64-bit on 64-bit windows (a PFN_NUMBER in the MDL). So, 64-bit per page.
100	* The cost on Linux is identical, but here it's because of sizeof(struct page *).
101	*
102	*
103	* @section sec_gmm_legacy Legacy Mode for Non-Tier-1 Platforms
104	*
105	* In legacy mode the page source is locked user pages and not
106	* #RTR0MemObjAllocPhysNC, this means that a page can only be allocated
107	* by the VM that locked it. We will make no attempt at implementing
108	* page sharing on these systems, just do enough to make it all work.
109	*
110	*
111	* @subsection sub_gmm_locking Serializing
112	*
113	* One simple fast mutex will be employed in the initial implementation, not
114	* two as mentioned in @ref subsec_pgmPhys_Serializing.
115	*
116	* @see @ref subsec_pgmPhys_Serializing
117	*
118	*
119	* @section sec_gmm_overcommit Memory Over-Commitment Management
120	*
121	* The GVM will have to do the system wide memory over-commitment
122	* management. My current ideas are:
123	* - Per VM oc policy that indicates how much to initially commit
124	* to it and what to do in a out-of-memory situation.
125	* - Prevent overtaxing the host.
126	*
127	* There are some challenges here, the main ones are configurability and
128	* security. Should we for instance permit anyone to request 100% memory
129	* commitment? Who should be allowed to do runtime adjustments of the
130	* config. And how to prevent these settings from being lost when the last
131	* VM process exits? The solution is probably to have an optional root
132	* daemon the will keep VMMR0.r0 in memory and enable the security measures.
133	*
134	*
135	*
136	* @section sec_gmm_numa NUMA
137	*
138	* NUMA considerations will be designed and implemented a bit later.
139	*
140	* The preliminary guesses is that we will have to try allocate memory as
141	* close as possible to the CPUs the VM is executed on (EMT and additional CPU
142	* threads). Which means it's mostly about allocation and sharing policies.
143	* Both the scheduler and allocator interface will to supply some NUMA info
144	* and we'll need to have a way to calc access costs.
145	*
146	*/
147
148
149	/*******************************************************************************
150	* Header Files *
151	*******************************************************************************/
152	#define LOG_GROUP LOG_GROUP_GMM
153	#include <VBox/rawpci.h>
154	#include <VBox/vmm/vm.h>
155	#include <VBox/vmm/gmm.h>
156	#include "GMMR0Internal.h"
157	#include <VBox/vmm/gvm.h>
158	#include <VBox/vmm/pgm.h>
159	#include <VBox/log.h>
160	#include <VBox/param.h>
161	#include <VBox/err.h>
162	#include <iprt/asm.h>
163	#include <iprt/avl.h>
164	#include <iprt/list.h>
165	#include <iprt/mem.h>
166	#include <iprt/memobj.h>
167	#include <iprt/mp.h>
168	#include <iprt/semaphore.h>
169	#include <iprt/string.h>
170	#include <iprt/time.h>
171
172
173	/*******************************************************************************
174	* Structures and Typedefs *
175	*******************************************************************************/
176	/** Pointer to set of free chunks. */
177	typedef struct GMMCHUNKFREESET *PGMMCHUNKFREESET;
178
179	/** Pointer to a GMM allocation chunk. */
180	typedef struct GMMCHUNK *PGMMCHUNK;
181
182	/**
183	* The per-page tracking structure employed by the GMM.
184	*
185	* On 32-bit hosts we'll some trickery is necessary to compress all
186	* the information into 32-bits. When the fSharedFree member is set,
187	* the 30th bit decides whether it's a free page or not.
188	*
189	* Because of the different layout on 32-bit and 64-bit hosts, macros
190	* are used to get and set some of the data.
191	*/
192	typedef union GMMPAGE
193	{
194	#if HC_ARCH_BITS == 64
195	/** Unsigned integer view. */
196	uint64_t u;
197
198	/** The common view. */
199	struct GMMPAGECOMMON
200	{
201	uint32_t uStuff1 : 32;
202	uint32_t uStuff2 : 30;
203	/** The page state. */
204	uint32_t u2State : 2;
205	} Common;
206
207	/** The view of a private page. */
208	struct GMMPAGEPRIVATE
209	{
210	/** The guest page frame number. (Max addressable: 2 ^ 44 - 16) */
211	uint32_t pfn;
212	/** The GVM handle. (64K VMs) */
213	uint32_t hGVM : 16;
214	/** Reserved. */
215	uint32_t u16Reserved : 14;
216	/** The page state. */
217	uint32_t u2State : 2;
218	} Private;
219
220	/** The view of a shared page. */
221	struct GMMPAGESHARED
222	{
223	/** The host page frame number. (Max addressable: 2 ^ 44 - 16) */
224	uint32_t pfn;
225	/** The reference count (64K VMs). */
226	uint32_t cRefs : 16;
227	/** Reserved. Checksum or something? Two hGVMs for forking? */
228	uint32_t u14Reserved : 14;
229	/** The page state. */
230	uint32_t u2State : 2;
231	} Shared;
232
233	/** The view of a free page. */
234	struct GMMPAGEFREE
235	{
236	/** The index of the next page in the free list. UINT16_MAX is NIL. */
237	uint16_t iNext;
238	/** Reserved. Checksum or something? */
239	uint16_t u16Reserved0;
240	/** Reserved. Checksum or something? */
241	uint32_t u30Reserved1 : 30;
242	/** The page state. */
243	uint32_t u2State : 2;
244	} Free;
245
246	#else /* 32-bit */
247	/** Unsigned integer view. */
248	uint32_t u;
249
250	/** The common view. */
251	struct GMMPAGECOMMON
252	{
253	uint32_t uStuff : 30;
254	/** The page state. */
255	uint32_t u2State : 2;
256	} Common;
257
258	/** The view of a private page. */
259	struct GMMPAGEPRIVATE
260	{
261	/** The guest page frame number. (Max addressable: 2 ^ 36) */
262	uint32_t pfn : 24;
263	/** The GVM handle. (127 VMs) */
264	uint32_t hGVM : 7;
265	/** The top page state bit, MBZ. */
266	uint32_t fZero : 1;
267	} Private;
268
269	/** The view of a shared page. */
270	struct GMMPAGESHARED
271	{
272	/** The reference count. */
273	uint32_t cRefs : 30;
274	/** The page state. */
275	uint32_t u2State : 2;
276	} Shared;
277
278	/** The view of a free page. */
279	struct GMMPAGEFREE
280	{
281	/** The index of the next page in the free list. UINT16_MAX is NIL. */
282	uint32_t iNext : 16;
283	/** Reserved. Checksum or something? */
284	uint32_t u14Reserved : 14;
285	/** The page state. */
286	uint32_t u2State : 2;
287	} Free;
288	#endif
289	} GMMPAGE;
290	AssertCompileSize(GMMPAGE, sizeof(RTHCUINTPTR));
291	/** Pointer to a GMMPAGE. */
292	typedef GMMPAGE *PGMMPAGE;
293
294
295	/** @name The Page States.
296	* @{ */
297	/** A private page. */
298	#define GMM_PAGE_STATE_PRIVATE 0
299	/** A private page - alternative value used on the 32-bit implementation.
300	* This will never be used on 64-bit hosts. */
301	#define GMM_PAGE_STATE_PRIVATE_32 1
302	/** A shared page. */
303	#define GMM_PAGE_STATE_SHARED 2
304	/** A free page. */
305	#define GMM_PAGE_STATE_FREE 3
306	/** @} */
307
308
309	/** @def GMM_PAGE_IS_PRIVATE
310	*
311	* @returns true if private, false if not.
312	* @param pPage The GMM page.
313	*/
314	#if HC_ARCH_BITS == 64
315	# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_PRIVATE )
316	#else
317	# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Private.fZero == 0 )
318	#endif
319
320	/** @def GMM_PAGE_IS_SHARED
321	*
322	* @returns true if shared, false if not.
323	* @param pPage The GMM page.
324	*/
325	#define GMM_PAGE_IS_SHARED(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_SHARED )
326
327	/** @def GMM_PAGE_IS_FREE
328	*
329	* @returns true if free, false if not.
330	* @param pPage The GMM page.
331	*/
332	#define GMM_PAGE_IS_FREE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_FREE )
333
334	/** @def GMM_PAGE_PFN_LAST
335	* The last valid guest pfn range.
336	* @remark Some of the values outside the range has special meaning,
337	* see GMM_PAGE_PFN_UNSHAREABLE.
338	*/
339	#if HC_ARCH_BITS == 64
340	# define GMM_PAGE_PFN_LAST UINT32_C(0xfffffff0)
341	#else
342	# define GMM_PAGE_PFN_LAST UINT32_C(0x00fffff0)
343	#endif
344	AssertCompile(GMM_PAGE_PFN_LAST == (GMM_GCPHYS_LAST >> PAGE_SHIFT));
345
346	/** @def GMM_PAGE_PFN_UNSHAREABLE
347	* Indicates that this page isn't used for normal guest memory and thus isn't shareable.
348	*/
349	#if HC_ARCH_BITS == 64
350	# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0xfffffff1)
351	#else
352	# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0x00fffff1)
353	#endif
354	AssertCompile(GMM_PAGE_PFN_UNSHAREABLE == (GMM_GCPHYS_UNSHAREABLE >> PAGE_SHIFT));
355
356
357	/**
358	* A GMM allocation chunk ring-3 mapping record.
359	*
360	* This should really be associated with a session and not a VM, but
361	* it's simpler to associated with a VM and cleanup with the VM object
362	* is destroyed.
363	*/
364	typedef struct GMMCHUNKMAP
365	{
366	/** The mapping object. */
367	RTR0MEMOBJ hMapObj;
368	/** The VM owning the mapping. */
369	PGVM pGVM;
370	} GMMCHUNKMAP;
371	/** Pointer to a GMM allocation chunk mapping. */
372	typedef struct GMMCHUNKMAP *PGMMCHUNKMAP;
373
374
375	/**
376	* A GMM allocation chunk.
377	*/
378	typedef struct GMMCHUNK
379	{
380	/** The AVL node core.
381	* The Key is the chunk ID. (Giant mtx.) */
382	AVLU32NODECORE Core;
383	/** The memory object.
384	* Either from RTR0MemObjAllocPhysNC or RTR0MemObjLockUser depending on
385	* what the host can dish up with. (Chunk mtx protects mapping accesses
386	* and related frees.) */
387	RTR0MEMOBJ hMemObj;
388	/** Pointer to the next chunk in the free list. (Giant mtx.) */
389	PGMMCHUNK pFreeNext;
390	/** Pointer to the previous chunk in the free list. (Giant mtx.) */
391	PGMMCHUNK pFreePrev;
392	/** Pointer to the free set this chunk belongs to. NULL for
393	* chunks with no free pages. (Giant mtx.) */
394	PGMMCHUNKFREESET pSet;
395	/** List node in the chunk list (GMM::ChunkList). (Giant mtx.) */
396	RTLISTNODE ListNode;
397	/** Pointer to an array of mappings. (Chunk mtx.) */
398	PGMMCHUNKMAP paMappingsX;
399	/** The number of mappings. (Chunk mtx.) */
400	uint16_t cMappingsX;
401	/** The mapping lock this chunk is using using. UINT16_MAX if nobody is
402	* mapping or freeing anything. (Giant mtx.) */
403	uint8_t volatile iChunkMtx;
404	/** Flags field reserved for future use (like eliminating enmType).
405	* (Giant mtx.) */
406	uint8_t fFlags;
407	/** The head of the list of free pages. UINT16_MAX is the NIL value.
408	* (Giant mtx.) */
409	uint16_t iFreeHead;
410	/** The number of free pages. (Giant mtx.) */
411	uint16_t cFree;
412	/** The GVM handle of the VM that first allocated pages from this chunk, this
413	* is used as a preference when there are several chunks to choose from.
414	* When in bound memory mode this isn't a preference any longer. (Giant
415	* mtx.) */
416	uint16_t hGVM;
417	/** The ID of the NUMA node the memory mostly resides on. (Reserved for
418	* future use.) (Giant mtx.) */
419	uint16_t idNumaNode;
420	/** The number of private pages. (Giant mtx.) */
421	uint16_t cPrivate;
422	/** The number of shared pages. (Giant mtx.) */
423	uint16_t cShared;
424	/** The pages. (Giant mtx.) */
425	GMMPAGE aPages[GMM_CHUNK_SIZE >> PAGE_SHIFT];
426	} GMMCHUNK;
427
428	/** Indicates that the NUMA properies of the memory is unknown. */
429	#define GMM_CHUNK_NUMA_ID_UNKNOWN UINT16_C(0xfffe)
430
431	/** @name GMM_CHUNK_FLAGS_XXX - chunk flags.
432	* @{ */
433	/** Indicates that the chunk is a large page (2MB). */
434	#define GMM_CHUNK_FLAGS_LARGE_PAGE UINT16_C(0x0001)
435	/** @} */
436
437
438	/**
439	* An allocation chunk TLB entry.
440	*/
441	typedef struct GMMCHUNKTLBE
442	{
443	/** The chunk id. */
444	uint32_t idChunk;
445	/** Pointer to the chunk. */
446	PGMMCHUNK pChunk;
447	} GMMCHUNKTLBE;
448	/** Pointer to an allocation chunk TLB entry. */
449	typedef GMMCHUNKTLBE *PGMMCHUNKTLBE;
450
451
452	/** The number of entries tin the allocation chunk TLB. */
453	#define GMM_CHUNKTLB_ENTRIES 32
454	/** Gets the TLB entry index for the given Chunk ID. */
455	#define GMM_CHUNKTLB_IDX(idChunk) ( (idChunk) & (GMM_CHUNKTLB_ENTRIES - 1) )
456
457	/**
458	* An allocation chunk TLB.
459	*/
460	typedef struct GMMCHUNKTLB
461	{
462	/** The TLB entries. */
463	GMMCHUNKTLBE aEntries[GMM_CHUNKTLB_ENTRIES];
464	} GMMCHUNKTLB;
465	/** Pointer to an allocation chunk TLB. */
466	typedef GMMCHUNKTLB *PGMMCHUNKTLB;
467
468
469	/** The GMMCHUNK::cFree shift count employed by gmmR0SelectFreeSetList. */
470	#define GMM_CHUNK_FREE_SET_SHIFT 4
471	/** Index of the list containing completely unused chunks.
472	* The code ASSUMES this is the last list. */
473	#define GMM_CHUNK_FREE_SET_UNUSED_LIST (GMM_CHUNK_NUM_PAGES >> GMM_CHUNK_FREE_SET_SHIFT)
474
475
476
477	/**
478	* A set of free chunks.
479	*/
480	typedef struct GMMCHUNKFREESET
481	{
482	/** The number of free pages in the set. */
483	uint64_t cFreePages;
484	/** The generation ID for the set. This is incremented whenever
485	* something is linked or unlinked from this set. */
486	uint64_t idGeneration;
487	/** Chunks ordered by increasing number of free pages.
488	* In the final list the chunks are completely unused. */
489	PGMMCHUNK apLists[GMM_CHUNK_FREE_SET_UNUSED_LIST + 1];
490	} GMMCHUNKFREESET;
491
492
493	/**
494	* The GMM instance data.
495	*/
496	typedef struct GMM
497	{
498	/** Magic / eye catcher. GMM_MAGIC */
499	uint32_t u32Magic;
500	/** The number of threads waiting on the mutex. */
501	uint32_t cMtxContenders;
502	/** The fast mutex protecting the GMM.
503	* More fine grained locking can be implemented later if necessary. */
504	RTSEMFASTMUTEX hMtx;
505	#ifdef VBOX_STRICT
506	/** The current mutex owner. */
507	RTNATIVETHREAD hMtxOwner;
508	#endif
509	/** The chunk tree. */
510	PAVLU32NODECORE pChunks;
511	/** The chunk TLB. */
512	GMMCHUNKTLB ChunkTLB;
513	/** The private free set. */
514	GMMCHUNKFREESET Private;
515	/** The shared free set. */
516	GMMCHUNKFREESET Shared;
517
518	/** Shared module tree (global). */
519	/** @todo separate trees for distinctly different guest OSes. */
520	PAVLGCPTRNODECORE pGlobalSharedModuleTree;
521
522	/** The chunk list. For simplifying the cleanup process. */
523	RTLISTNODE ChunkList;
524
525	/** The maximum number of pages we're allowed to allocate.
526	* @gcfgm 64-bit GMM/MaxPages Direct.
527	* @gcfgm 32-bit GMM/PctPages Relative to the number of host pages. */
528	uint64_t cMaxPages;
529	/** The number of pages that has been reserved.
530	* The deal is that cReservedPages - cOverCommittedPages <= cMaxPages. */
531	uint64_t cReservedPages;
532	/** The number of pages that we have over-committed in reservations. */
533	uint64_t cOverCommittedPages;
534	/** The number of actually allocated (committed if you like) pages. */
535	uint64_t cAllocatedPages;
536	/** The number of pages that are shared. A subset of cAllocatedPages. */
537	uint64_t cSharedPages;
538	/** The number of pages that are actually shared between VMs. */
539	uint64_t cDuplicatePages;
540	/** The number of pages that are shared that has been left behind by
541	* VMs not doing proper cleanups. */
542	uint64_t cLeftBehindSharedPages;
543	/** The number of allocation chunks.
544	* (The number of pages we've allocated from the host can be derived from this.) */
545	uint32_t cChunks;
546	/** The number of current ballooned pages. */
547	uint64_t cBalloonedPages;
548
549	/** The legacy allocation mode indicator.
550	* This is determined at initialization time. */
551	bool fLegacyAllocationMode;
552	/** The bound memory mode indicator.
553	* When set, the memory will be bound to a specific VM and never
554	* shared. This is always set if fLegacyAllocationMode is set.
555	* (Also determined at initialization time.) */
556	bool fBoundMemoryMode;
557	/** The number of registered VMs. */
558	uint16_t cRegisteredVMs;
559
560	/** The number of freed chunks ever. This is used a list generation to
561	* avoid restarting the cleanup scanning when the list wasn't modified. */
562	uint32_t cFreedChunks;
563	/** The previous allocated Chunk ID.
564	* Used as a hint to avoid scanning the whole bitmap. */
565	uint32_t idChunkPrev;
566	/** Chunk ID allocation bitmap.
567	* Bits of allocated IDs are set, free ones are clear.
568	* The NIL id (0) is marked allocated. */
569	uint32_t bmChunkId[(GMM_CHUNKID_LAST + 1 + 31) / 32];
570
571	/** The index of the next mutex to use. */
572	uint32_t iNextChunkMtx;
573	/** Chunk locks for reducing lock contention without having to allocate
574	* one lock per chunk. */
575	struct
576	{
577	/** The mutex */
578	RTSEMFASTMUTEX hMtx;
579	/** The number of threads currently using this mutex. */
580	uint32_t volatile cUsers;
581	} aChunkMtx[64];
582	} GMM;
583	/** Pointer to the GMM instance. */
584	typedef GMM *PGMM;
585
586	/** The value of GMM::u32Magic (Katsuhiro Otomo). */
587	#define GMM_MAGIC UINT32_C(0x19540414)
588
589
590	/**
591	* GMM chunk mutex state.
592	*
593	* This is returned by gmmR0ChunkMutexAcquire and is used by the other
594	* gmmR0ChunkMutex* methods.
595	*/
596	typedef struct GMMR0CHUNKMTXSTATE
597	{
598	PGMM pGMM;
599	/** The index of the chunk mutex. */
600	uint8_t iChunkMtx;
601	/** The relevant flags (GMMR0CHUNK_MTX_XXX). */
602	uint8_t fFlags;
603	} GMMR0CHUNKMTXSTATE;
604	/** Pointer to a chunk mutex state. */
605	typedef GMMR0CHUNKMTXSTATE *PGMMR0CHUNKMTXSTATE;
606
607	/** @name GMMR0CHUNK_MTX_XXX
608	* @{ */
609	#define GMMR0CHUNK_MTX_INVALID UINT32_C(0)
610	#define GMMR0CHUNK_MTX_KEEP_GIANT UINT32_C(1)
611	#define GMMR0CHUNK_MTX_RETAKE_GIANT UINT32_C(2)
612	#define GMMR0CHUNK_MTX_DROP_GIANT UINT32_C(3)
613	#define GMMR0CHUNK_MTX_END UINT32_C(4)
614	/** @} */
615
616
617	/**
618	* Page allocation strategy sketches.
619	*/
620	typedef struct GMMR0ALLOCPAGESTRATEGY
621	{
622	uint32_t cTries;
623	#if 0
624	typedef enum GMMR0ALLOCPAGESTRATEGY
625	{
626	kGMMR0AllocPageStrategy_Invalid = 0,
627	kGMMR0AllocPageStrategy_VM,
628	kGMMR0AllocPageStrategy_NumaNode,
629	kGMMR0AllocPageStrategy_AnythingGoes,
630	kGMMR0AllocPageStrategy_End
631	} GMMR0ALLOCPAGESTRATEGY;
632	#endif
633	} GMMR0ALLOCPAGESTRATEGY;
634	/** Pointer to a page allocation strategy structure. */
635	typedef GMMR0ALLOCPAGESTRATEGY *PGMMR0ALLOCPAGESTRATEGY;
636
637
638	/*******************************************************************************
639	* Global Variables *
640	*******************************************************************************/
641	/** Pointer to the GMM instance data. */
642	static PGMM g_pGMM = NULL;
643
644	/** Macro for obtaining and validating the g_pGMM pointer.
645	* On failure it will return from the invoking function with the specified return value.
646	*
647	* @param pGMM The name of the pGMM variable.
648	* @param rc The return value on failure. Use VERR_INTERNAL_ERROR for
649	* VBox status codes.
650	*/
651	#define GMM_GET_VALID_INSTANCE(pGMM, rc) \
652	do { \
653	(pGMM) = g_pGMM; \
654	AssertPtrReturn((pGMM), (rc)); \
655	AssertMsgReturn((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic), (rc)); \
656	} while (0)
657
658	/** Macro for obtaining and validating the g_pGMM pointer, void function variant.
659	* On failure it will return from the invoking function.
660	*
661	* @param pGMM The name of the pGMM variable.
662	*/
663	#define GMM_GET_VALID_INSTANCE_VOID(pGMM) \
664	do { \
665	(pGMM) = g_pGMM; \
666	AssertPtrReturnVoid((pGMM)); \
667	AssertMsgReturnVoid((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic)); \
668	} while (0)
669
670
671	/** @def GMM_CHECK_SANITY_UPON_ENTERING
672	* Checks the sanity of the GMM instance data before making changes.
673	*
674	* This is macro is a stub by default and must be enabled manually in the code.
675	*
676	* @returns true if sane, false if not.
677	* @param pGMM The name of the pGMM variable.
678	*/
679	#if defined(VBOX_STRICT) && 0
680	# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
681	#else
682	# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (true)
683	#endif
684
685	/** @def GMM_CHECK_SANITY_UPON_LEAVING
686	* Checks the sanity of the GMM instance data after making changes.
687	*
688	* This is macro is a stub by default and must be enabled manually in the code.
689	*
690	* @returns true if sane, false if not.
691	* @param pGMM The name of the pGMM variable.
692	*/
693	#if defined(VBOX_STRICT) && 0
694	# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
695	#else
696	# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (true)
697	#endif
698
699	/** @def GMM_CHECK_SANITY_IN_LOOPS
700	* Checks the sanity of the GMM instance in the allocation loops.
701	*
702	* This is macro is a stub by default and must be enabled manually in the code.
703	*
704	* @returns true if sane, false if not.
705	* @param pGMM The name of the pGMM variable.
706	*/
707	#if defined(VBOX_STRICT) && 0
708	# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
709	#else
710	# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (true)
711	#endif
712
713
714	/*******************************************************************************
715	* Internal Functions *
716	*******************************************************************************/
717	static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM);
718	static bool gmmR0CleanupVMScanChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
719	DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet);
720	DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk);
721	static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo);
722	static bool gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem);
723	static void gmmR0FreeSharedPage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage);
724	static int gmmR0UnmapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
725	static void gmmR0SharedModuleCleanup(PGMM pGMM, PGVM pGVM);
726
727
728
729	/**
730	* Initializes the GMM component.
731	*
732	* This is called when the VMMR0.r0 module is loaded and protected by the
733	* loader semaphore.
734	*
735	* @returns VBox status code.
736	*/
737	GMMR0DECL(int) GMMR0Init(void)
738	{
739	LogFlow(("GMMInit:\n"));
740
741	/*
742	* Allocate the instance data and the locks.
743	*/
744	PGMM pGMM = (PGMM)RTMemAllocZ(sizeof(*pGMM));
745	if (!pGMM)
746	return VERR_NO_MEMORY;
747
748	pGMM->u32Magic = GMM_MAGIC;
749	for (unsigned i = 0; i < RT_ELEMENTS(pGMM->ChunkTLB.aEntries); i++)
750	pGMM->ChunkTLB.aEntries[i].idChunk = NIL_GMM_CHUNKID;
751	RTListInit(&pGMM->ChunkList);
752	ASMBitSet(&pGMM->bmChunkId[0], NIL_GMM_CHUNKID);
753
754	int rc = RTSemFastMutexCreate(&pGMM->hMtx);
755	if (RT_SUCCESS(rc))
756	{
757	unsigned iMtx;
758	for (iMtx = 0; iMtx < RT_ELEMENTS(pGMM->aChunkMtx); iMtx++)
759	{
760	rc = RTSemFastMutexCreate(&pGMM->aChunkMtx[iMtx].hMtx);
761	if (RT_FAILURE(rc))
762	break;
763	}
764	if (RT_SUCCESS(rc))
765	{
766	/*
767	* Check and see if RTR0MemObjAllocPhysNC works.
768	*/
769	#if 0 /* later, see #3170. */
770	RTR0MEMOBJ MemObj;
771	rc = RTR0MemObjAllocPhysNC(&MemObj, _64K, NIL_RTHCPHYS);
772	if (RT_SUCCESS(rc))
773	{
774	rc = RTR0MemObjFree(MemObj, true);
775	AssertRC(rc);
776	}
777	else if (rc == VERR_NOT_SUPPORTED)
778	pGMM->fLegacyAllocationMode = pGMM->fBoundMemoryMode = true;
779	else
780	SUPR0Printf("GMMR0Init: RTR0MemObjAllocPhysNC(,64K,Any) -> %d!\n", rc);
781	#else
782	# if defined(RT_OS_WINDOWS) \|\| (defined(RT_OS_SOLARIS) && ARCH_BITS == 64) \|\| defined(RT_OS_LINUX) \|\| defined(RT_OS_FREEBSD)
783	pGMM->fLegacyAllocationMode = false;
784	# if ARCH_BITS == 32
785	/* Don't reuse possibly partial chunks because of the virtual
786	address space limitation. */
787	pGMM->fBoundMemoryMode = true;
788	# else
789	pGMM->fBoundMemoryMode = false;
790	# endif
791	# else
792	pGMM->fLegacyAllocationMode = true;
793	pGMM->fBoundMemoryMode = true;
794	# endif
795	#endif
796
797	/*
798	* Query system page count and guess a reasonable cMaxPages value.
799	*/
800	pGMM->cMaxPages = UINT32_MAX; /** @todo IPRT function for query ram size and such. */
801
802	g_pGMM = pGMM;
803	LogFlow(("GMMInit: pGMM=%p fLegacyAllocationMode=%RTbool fBoundMemoryMode=%RTbool\n", pGMM, pGMM->fLegacyAllocationMode, pGMM->fBoundMemoryMode));
804	return VINF_SUCCESS;
805	}
806
807	/*
808	* Bail out.
809	*/
810	while (iMtx-- > 0)
811	RTSemFastMutexDestroy(pGMM->aChunkMtx[iMtx].hMtx);
812	RTSemFastMutexDestroy(pGMM->hMtx);
813	}
814
815	pGMM->u32Magic = 0;
816	RTMemFree(pGMM);
817	SUPR0Printf("GMMR0Init: failed! rc=%d\n", rc);
818	return rc;
819	}
820
821
822	/**
823	* Terminates the GMM component.
824	*/
825	GMMR0DECL(void) GMMR0Term(void)
826	{
827	LogFlow(("GMMTerm:\n"));
828
829	/*
830	* Take care / be paranoid...
831	*/
832	PGMM pGMM = g_pGMM;
833	if (!VALID_PTR(pGMM))
834	return;
835	if (pGMM->u32Magic != GMM_MAGIC)
836	{
837	SUPR0Printf("GMMR0Term: u32Magic=%#x\n", pGMM->u32Magic);
838	return;
839	}
840
841	/*
842	* Undo what init did and free all the resources we've acquired.
843	*/
844	/* Destroy the fundamentals. */
845	g_pGMM = NULL;
846	pGMM->u32Magic = ~GMM_MAGIC;
847	RTSemFastMutexDestroy(pGMM->hMtx);
848	pGMM->hMtx = NIL_RTSEMFASTMUTEX;
849
850	/* Free any chunks still hanging around. */
851	RTAvlU32Destroy(&pGMM->pChunks, gmmR0TermDestroyChunk, pGMM);
852
853	/* Destroy the chunk locks. */
854	for (unsigned iMtx = 0; iMtx++ < RT_ELEMENTS(pGMM->aChunkMtx); iMtx++)
855	{
856	Assert(pGMM->aChunkMtx[iMtx].cUsers == 0);
857	RTSemFastMutexDestroy(pGMM->aChunkMtx[iMtx].hMtx);
858	pGMM->aChunkMtx[iMtx].hMtx = NIL_RTSEMFASTMUTEX;
859	}
860
861	/* Finally the instance data itself. */
862	RTMemFree(pGMM);
863	LogFlow(("GMMTerm: done\n"));
864	}
865
866
867	/**
868	* RTAvlU32Destroy callback.
869	*
870	* @returns 0
871	* @param pNode The node to destroy.
872	* @param pvGMM The GMM handle.
873	*/
874	static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM)
875	{
876	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
877
878	if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
879	SUPR0Printf("GMMR0Term: %p/%#x: cFree=%d cPrivate=%d cShared=%d cMappings=%d\n", pChunk,
880	pChunk->Core.Key, pChunk->cFree, pChunk->cPrivate, pChunk->cShared, pChunk->cMappingsX);
881
882	int rc = RTR0MemObjFree(pChunk->hMemObj, true /* fFreeMappings */);
883	if (RT_FAILURE(rc))
884	{
885	SUPR0Printf("GMMR0Term: %p/%#x: RTRMemObjFree(%p,true) -> %d (cMappings=%d)\n", pChunk,
886	pChunk->Core.Key, pChunk->hMemObj, rc, pChunk->cMappingsX);
887	AssertRC(rc);
888	}
889	pChunk->hMemObj = NIL_RTR0MEMOBJ;
890
891	RTMemFree(pChunk->paMappingsX);
892	pChunk->paMappingsX = NULL;
893
894	RTMemFree(pChunk);
895	NOREF(pvGMM);
896	return 0;
897	}
898
899
900	/**
901	* Initializes the per-VM data for the GMM.
902	*
903	* This is called from within the GVMM lock (from GVMMR0CreateVM)
904	* and should only initialize the data members so GMMR0CleanupVM
905	* can deal with them. We reserve no memory or anything here,
906	* that's done later in GMMR0InitVM.
907	*
908	* @param pGVM Pointer to the Global VM structure.
909	*/
910	GMMR0DECL(void) GMMR0InitPerVMData(PGVM pGVM)
911	{
912	AssertCompile(RT_SIZEOFMEMB(GVM,gmm.s) <= RT_SIZEOFMEMB(GVM,gmm.padding));
913
914	pGVM->gmm.s.enmPolicy = GMMOCPOLICY_INVALID;
915	pGVM->gmm.s.enmPriority = GMMPRIORITY_INVALID;
916	pGVM->gmm.s.fMayAllocate = false;
917	}
918
919
920	/**
921	* Acquires the GMM giant lock.
922	*
923	* @returns Assert status code from RTSemFastMutexRequest.
924	* @param pGMM Pointer to the GMM instance.
925	*/
926	static int gmmR0MutexAcquire(PGMM pGMM)
927	{
928	ASMAtomicIncU32(&pGMM->cMtxContenders);
929	int rc = RTSemFastMutexRequest(pGMM->hMtx);
930	ASMAtomicDecU32(&pGMM->cMtxContenders);
931	AssertRC(rc);
932	#ifdef VBOX_STRICT
933	pGMM->hMtxOwner = RTThreadNativeSelf();
934	#endif
935	return rc;
936	}
937
938
939	/**
940	* Releases the GMM giant lock.
941	*
942	* @returns Assert status code from RTSemFastMutexRequest.
943	* @param pGMM Pointer to the GMM instance.
944	*/
945	static int gmmR0MutexRelease(PGMM pGMM)
946	{
947	#ifdef VBOX_STRICT
948	pGMM->hMtxOwner = NIL_RTNATIVETHREAD;
949	#endif
950	int rc = RTSemFastMutexRelease(pGMM->hMtx);
951	AssertRC(rc);
952	return rc;
953	}
954
955
956	/**
957	* Yields the GMM giant lock if there is contention and a certain minimum time
958	* has elapsed since we took it.
959	*
960	* @returns @c true if the mutex was yielded, @c false if not.
961	* @param pGMM Pointer to the GMM instance.
962	* @param puLockNanoTS Where the lock acquisition time stamp is kept
963	* (in/out).
964	*/
965	static bool gmmR0MutexYield(PGMM pGMM, uint64_t *puLockNanoTS)
966	{
967	/*
968	* If nobody is contending the mutex, don't bother checking the time.
969	*/
970	if (ASMAtomicReadU32(&pGMM->cMtxContenders) == 0)
971	return false;
972
973	/*
974	* Don't yield if we haven't executed for at least 2 milliseconds.
975	*/
976	uint64_t uNanoNow = RTTimeSystemNanoTS();
977	if (uNanoNow - *puLockNanoTS < UINT32_C(2000000))
978	return false;
979
980	/*
981	* Yield the mutex.
982	*/
983	#ifdef VBOX_STRICT
984	pGMM->hMtxOwner = NIL_RTNATIVETHREAD;
985	#endif
986	ASMAtomicIncU32(&pGMM->cMtxContenders);
987	int rc1 = RTSemFastMutexRelease(pGMM->hMtx); AssertRC(rc1);
988
989	RTThreadYield();
990
991	int rc2 = RTSemFastMutexRequest(pGMM->hMtx); AssertRC(rc2);
992	*puLockNanoTS = RTTimeSystemNanoTS();
993	ASMAtomicDecU32(&pGMM->cMtxContenders);
994	#ifdef VBOX_STRICT
995	pGMM->hMtxOwner = RTThreadNativeSelf();
996	#endif
997
998	return true;
999	}
1000
1001
1002	/**
1003	* Acquires a chunk lock.
1004	*
1005	* The caller must own the giant lock.
1006	*
1007	* @returns Assert status code from RTSemFastMutexRequest.
1008	* @param pMtxState The chunk mutex state info. (Avoids
1009	* passing the same flags and stuff around
1010	* for subsequent release and drop-giant
1011	* calls.)
1012	* @param pGMM Pointer to the GMM instance.
1013	* @param pChunk Pointer to the chunk.
1014	* @param fFlags Flags regarding the giant lock, GMMR0CHUNK_MTX_XXX.
1015	*/
1016	static int gmmR0ChunkMutexAcquire(PGMMR0CHUNKMTXSTATE pMtxState, PGMM pGMM, PGMMCHUNK pChunk, uint32_t fFlags)
1017	{
1018	Assert(fFlags > GMMR0CHUNK_MTX_INVALID && fFlags < GMMR0CHUNK_MTX_END);
1019	Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
1020
1021	pMtxState->pGMM = pGMM;
1022	pMtxState->fFlags = (uint8_t)fFlags;
1023
1024	/*
1025	* Get the lock index and reference the lock.
1026	*/
1027	Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
1028	uint32_t iChunkMtx = pChunk->iChunkMtx;
1029	if (iChunkMtx == UINT8_MAX)
1030	{
1031	iChunkMtx = pGMM->iNextChunkMtx++;
1032	iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
1033
1034	/* Try get an unused one... */
1035	if (pGMM->aChunkMtx[iChunkMtx].cUsers)
1036	{
1037	iChunkMtx = pGMM->iNextChunkMtx++;
1038	iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
1039	if (pGMM->aChunkMtx[iChunkMtx].cUsers)
1040	{
1041	iChunkMtx = pGMM->iNextChunkMtx++;
1042	iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
1043	if (pGMM->aChunkMtx[iChunkMtx].cUsers)
1044	{
1045	iChunkMtx = pGMM->iNextChunkMtx++;
1046	iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
1047	}
1048	}
1049	}
1050
1051	pChunk->iChunkMtx = iChunkMtx;
1052	}
1053	AssertCompile(RT_ELEMENTS(pGMM->aChunkMtx) < UINT8_MAX);
1054	pMtxState->iChunkMtx = (uint8_t)iChunkMtx;
1055	ASMAtomicIncU32(&pGMM->aChunkMtx[iChunkMtx].cUsers);
1056
1057	/*
1058	* Drop the giant?
1059	*/
1060	if (fFlags != GMMR0CHUNK_MTX_KEEP_GIANT)
1061	{
1062	/** @todo GMM life cycle cleanup (we may race someone
1063	* destroying and cleaning up GMM)? */
1064	gmmR0MutexRelease(pGMM);
1065	}
1066
1067	/*
1068	* Take the chunk mutex.
1069	*/
1070	int rc = RTSemFastMutexRequest(pGMM->aChunkMtx[iChunkMtx].hMtx);
1071	AssertRC(rc);
1072	return rc;
1073	}
1074
1075
1076	/**
1077	* Releases the GMM giant lock.
1078	*
1079	* @returns Assert status code from RTSemFastMutexRequest.
1080	* @param pGMM Pointer to the GMM instance.
1081	* @param pChunk Pointer to the chunk if it's still
1082	* alive, NULL if it isn't. This is used to deassociate
1083	* the chunk from the mutex on the way out so a new one
1084	* can be selected next time, thus avoiding contented
1085	* mutexes.
1086	*/
1087	static int gmmR0ChunkMutexRelease(PGMMR0CHUNKMTXSTATE pMtxState, PGMMCHUNK pChunk)
1088	{
1089	PGMM pGMM = pMtxState->pGMM;
1090
1091	/*
1092	* Release the chunk mutex and reacquire the giant if requested.
1093	*/
1094	int rc = RTSemFastMutexRelease(pGMM->aChunkMtx[pMtxState->iChunkMtx].hMtx);
1095	AssertRC(rc);
1096	if (pMtxState->fFlags == GMMR0CHUNK_MTX_RETAKE_GIANT)
1097	rc = gmmR0MutexAcquire(pGMM);
1098	else
1099	Assert((pMtxState->fFlags != GMMR0CHUNK_MTX_DROP_GIANT) == (pGMM->hMtxOwner == RTThreadNativeSelf()));
1100
1101	/*
1102	* Drop the chunk mutex user reference and deassociate it from the chunk
1103	* when possible.
1104	*/
1105	if ( ASMAtomicDecU32(&pGMM->aChunkMtx[pMtxState->iChunkMtx].cUsers) == 0
1106	&& pChunk
1107	&& RT_SUCCESS(rc) )
1108	{
1109	if (pMtxState->fFlags != GMMR0CHUNK_MTX_DROP_GIANT)
1110	pChunk->iChunkMtx = UINT8_MAX;
1111	else
1112	{
1113	rc = gmmR0MutexAcquire(pGMM);
1114	if (RT_SUCCESS(rc))
1115	{
1116	if (pGMM->aChunkMtx[pMtxState->iChunkMtx].cUsers == 0)
1117	pChunk->iChunkMtx = UINT8_MAX;
1118	rc = gmmR0MutexRelease(pGMM);
1119	}
1120	}
1121	}
1122
1123	pMtxState->pGMM = NULL;
1124	return rc;
1125	}
1126
1127
1128	/**
1129	* Drops the giant GMM lock we kept in gmmR0ChunkMutexAcquire while keeping the
1130	* chunk locked.
1131	*
1132	* This only works if gmmR0ChunkMutexAcquire was called with
1133	* GMMR0CHUNK_MTX_KEEP_GIANT. gmmR0ChunkMutexRelease will retake the giant
1134	* mutex, i.e. behave as if GMMR0CHUNK_MTX_RETAKE_GIANT was used.
1135	*
1136	* @returns VBox status code (assuming success is ok).
1137	* @param pMtxState Pointer to the chunk mutex state.
1138	*/
1139	static int gmmR0ChunkMutexDropGiant(PGMMR0CHUNKMTXSTATE pMtxState)
1140	{
1141	AssertReturn(pMtxState->fFlags == GMMR0CHUNK_MTX_KEEP_GIANT, VERR_INTERNAL_ERROR_2);
1142	Assert(pMtxState->pGMM->hMtxOwner == RTThreadNativeSelf());
1143	pMtxState->fFlags = GMMR0CHUNK_MTX_RETAKE_GIANT;
1144	/** @todo GMM life cycle cleanup (we may race someone
1145	* destroying and cleaning up GMM)? */
1146	return gmmR0MutexRelease(pMtxState->pGMM);
1147	}
1148
1149
1150	/**
1151	* For experimenting with NUMA affinity and such.
1152	*
1153	* @returns The current NUMA Node ID.
1154	*/
1155	static uint16_t gmmR0GetCurrentNumaNodeId(void)
1156	{
1157	#if 1
1158	return GMM_CHUNK_NUMA_ID_UNKNOWN;
1159	#else
1160	return RTMpCpuId() / 16;
1161	#endif
1162	}
1163
1164
1165
1166	/**
1167	* Cleans up when a VM is terminating.
1168	*
1169	* @param pGVM Pointer to the Global VM structure.
1170	*/
1171	GMMR0DECL(void) GMMR0CleanupVM(PGVM pGVM)
1172	{
1173	LogFlow(("GMMR0CleanupVM: pGVM=%p:{.pVM=%p, .hSelf=%#x}\n", pGVM, pGVM->pVM, pGVM->hSelf));
1174
1175	PGMM pGMM;
1176	GMM_GET_VALID_INSTANCE_VOID(pGMM);
1177
1178	#ifdef VBOX_WITH_PAGE_SHARING
1179	/*
1180	* Clean up all registered shared modules first.
1181	*/
1182	gmmR0SharedModuleCleanup(pGMM, pGVM);
1183	#endif
1184
1185	gmmR0MutexAcquire(pGMM);
1186	uint64_t uLockNanoTS = RTTimeSystemNanoTS();
1187	GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
1188
1189	/*
1190	* The policy is 'INVALID' until the initial reservation
1191	* request has been serviced.
1192	*/
1193	if ( pGVM->gmm.s.enmPolicy > GMMOCPOLICY_INVALID
1194	&& pGVM->gmm.s.enmPolicy < GMMOCPOLICY_END)
1195	{
1196	/*
1197	* If it's the last VM around, we can skip walking all the chunk looking
1198	* for the pages owned by this VM and instead flush the whole shebang.
1199	*
1200	* This takes care of the eventuality that a VM has left shared page
1201	* references behind (shouldn't happen of course, but you never know).
1202	*/
1203	Assert(pGMM->cRegisteredVMs);
1204	pGMM->cRegisteredVMs--;
1205
1206	/*
1207	* Walk the entire pool looking for pages that belong to this VM
1208	* and leftover mappings. (This'll only catch private pages,
1209	* shared pages will be 'left behind'.)
1210	*/
1211	uint64_t cPrivatePages = pGVM->gmm.s.cPrivatePages; /* save */
1212
1213	unsigned iCountDown = 64;
1214	bool fRedoFromStart;
1215	PGMMCHUNK pChunk;
1216	do
1217	{
1218	fRedoFromStart = false;
1219	RTListForEachReverse(&pGMM->ChunkList, pChunk, GMMCHUNK, ListNode)
1220	{
1221	uint32_t const cFreeChunksOld = pGMM->cFreedChunks;
1222	if (gmmR0CleanupVMScanChunk(pGMM, pGVM, pChunk))
1223	{
1224	/* We left the giant mutex, so reset the yield counters. */
1225	uLockNanoTS = RTTimeSystemNanoTS();
1226	iCountDown = 64;
1227	}
1228	else
1229	{
1230	/* Didn't leave it, so do normal yielding. */
1231	if (!iCountDown)
1232	gmmR0MutexYield(pGMM, &uLockNanoTS);
1233	else
1234	iCountDown--;
1235	}
1236	if (pGMM->cFreedChunks != cFreeChunksOld)
1237	break;
1238	}
1239	} while (fRedoFromStart);
1240
1241	if (pGVM->gmm.s.cPrivatePages)
1242	SUPR0Printf("GMMR0CleanupVM: hGVM=%#x has %#x private pages that cannot be found!\n", pGVM->hSelf, pGVM->gmm.s.cPrivatePages);
1243
1244	pGMM->cAllocatedPages -= cPrivatePages;
1245
1246	/*
1247	* Free empty chunks.
1248	*/
1249	do
1250	{
1251	fRedoFromStart = false;
1252	iCountDown = 10240;
1253	pChunk = pGMM->Private.apLists[GMM_CHUNK_FREE_SET_UNUSED_LIST];
1254	while (pChunk)
1255	{
1256	PGMMCHUNK pNext = pChunk->pFreeNext;
1257	Assert(pChunk->cFree == GMM_CHUNK_NUM_PAGES);
1258	if ( !pGMM->fBoundMemoryMode
1259	\|\| pChunk->hGVM == pGVM->hSelf)
1260	{
1261	uint64_t const idGenerationOld = pGMM->Private.idGeneration;
1262	if (gmmR0FreeChunk(pGMM, pGVM, pChunk, true /fRelaxedSem/))
1263	{
1264	/* We've left the giant mutex, restart? (+1 for our unlink) */
1265	fRedoFromStart = pGMM->Private.idGeneration != idGenerationOld + 1;
1266	if (fRedoFromStart)
1267	break;
1268	uLockNanoTS = RTTimeSystemNanoTS();
1269	iCountDown = 10240;
1270	}
1271	}
1272
1273	/* Advance and maybe yield the lock. */
1274	pChunk = pNext;
1275	if (--iCountDown == 0)
1276	{
1277	uint64_t const idGenerationOld = pGMM->Private.idGeneration;
1278	fRedoFromStart = gmmR0MutexYield(pGMM, &uLockNanoTS)
1279	&& pGMM->Private.idGeneration != idGenerationOld;
1280	if (fRedoFromStart)
1281	break;
1282	iCountDown = 10240;
1283	}
1284	}
1285	} while (fRedoFromStart);
1286
1287	/*
1288	* Account for shared pages that weren't freed.
1289	*/
1290	if (pGVM->gmm.s.cSharedPages)
1291	{
1292	Assert(pGMM->cSharedPages >= pGVM->gmm.s.cSharedPages);
1293	SUPR0Printf("GMMR0CleanupVM: hGVM=%#x left %#x shared pages behind!\n", pGVM->hSelf, pGVM->gmm.s.cSharedPages);
1294	pGMM->cLeftBehindSharedPages += pGVM->gmm.s.cSharedPages;
1295	}
1296
1297	/*
1298	* Clean up balloon statistics in case the VM process crashed.
1299	*/
1300	Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.cBalloonedPages);
1301	pGMM->cBalloonedPages -= pGVM->gmm.s.cBalloonedPages;
1302
1303	/*
1304	* Update the over-commitment management statistics.
1305	*/
1306	pGMM->cReservedPages -= pGVM->gmm.s.Reserved.cBasePages
1307	+ pGVM->gmm.s.Reserved.cFixedPages
1308	+ pGVM->gmm.s.Reserved.cShadowPages;
1309	switch (pGVM->gmm.s.enmPolicy)
1310	{
1311	case GMMOCPOLICY_NO_OC:
1312	break;
1313	default:
1314	/** @todo Update GMM->cOverCommittedPages */
1315	break;
1316	}
1317	}
1318
1319	/* zap the GVM data. */
1320	pGVM->gmm.s.enmPolicy = GMMOCPOLICY_INVALID;
1321	pGVM->gmm.s.enmPriority = GMMPRIORITY_INVALID;
1322	pGVM->gmm.s.fMayAllocate = false;
1323
1324	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1325	gmmR0MutexRelease(pGMM);
1326
1327	LogFlow(("GMMR0CleanupVM: returns\n"));
1328	}
1329
1330
1331	/**
1332	* Scan one chunk for private pages belonging to the specified VM.
1333	*
1334	* @note This function may drop the gian mutex!
1335	*
1336	* @returns @c true if we've temporarily dropped the giant mutex, @c false if
1337	* we didn't.
1338	* @param pGMM Pointer to the GMM instance.
1339	* @param pGVM The global VM handle.
1340	* @param pChunk The chunk to scan.
1341	*/
1342	static bool gmmR0CleanupVMScanChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
1343	{
1344	/*
1345	* Look for pages belonging to the VM.
1346	* (Perform some internal checks while we're scanning.)
1347	*/
1348	#ifndef VBOX_STRICT
1349	if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
1350	#endif
1351	{
1352	unsigned cPrivate = 0;
1353	unsigned cShared = 0;
1354	unsigned cFree = 0;
1355
1356	gmmR0UnlinkChunk(pChunk); /* avoiding cFreePages updates. */
1357
1358	uint16_t hGVM = pGVM->hSelf;
1359	unsigned iPage = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
1360	while (iPage-- > 0)
1361	if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
1362	{
1363	if (pChunk->aPages[iPage].Private.hGVM == hGVM)
1364	{
1365	/*
1366	* Free the page.
1367	*
1368	* The reason for not using gmmR0FreePrivatePage here is that we
1369	* must not cause the chunk to be freed from under us - we're in
1370	* an AVL tree walk here.
1371	*/
1372	pChunk->aPages[iPage].u = 0;
1373	pChunk->aPages[iPage].Free.iNext = pChunk->iFreeHead;
1374	pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
1375	pChunk->iFreeHead = iPage;
1376	pChunk->cPrivate--;
1377	pChunk->cFree++;
1378	pGVM->gmm.s.cPrivatePages--;
1379	cFree++;
1380	}
1381	else
1382	cPrivate++;
1383	}
1384	else if (GMM_PAGE_IS_FREE(&pChunk->aPages[iPage]))
1385	cFree++;
1386	else
1387	cShared++;
1388
1389	gmmR0LinkChunk(pChunk, pChunk->cShared ? &g_pGMM->Shared : &g_pGMM->Private);
1390
1391	/*
1392	* Did it add up?
1393	*/
1394	if (RT_UNLIKELY( pChunk->cFree != cFree
1395	\|\| pChunk->cPrivate != cPrivate
1396	\|\| pChunk->cShared != cShared))
1397	{
1398	SUPR0Printf("gmmR0CleanupVMScanChunk: Chunk %p/%#x has bogus stats - free=%d/%d private=%d/%d shared=%d/%d\n",
1399	pChunk->cFree, cFree, pChunk->cPrivate, cPrivate, pChunk->cShared, cShared);
1400	pChunk->cFree = cFree;
1401	pChunk->cPrivate = cPrivate;
1402	pChunk->cShared = cShared;
1403	}
1404	}
1405
1406	/*
1407	* If not in bound memory mode, we should reset the hGVM field
1408	* if it has our handle in it.
1409	*/
1410	if (pChunk->hGVM == pGVM->hSelf)
1411	{
1412	if (!g_pGMM->fBoundMemoryMode)
1413	pChunk->hGVM = NIL_GVM_HANDLE;
1414	else if (pChunk->cFree != GMM_CHUNK_NUM_PAGES)
1415	{
1416	SUPR0Printf("gmmR0CleanupVMScanChunk: %p/%#x: cFree=%#x - it should be 0 in bound mode!\n",
1417	pChunk, pChunk->Core.Key, pChunk->cFree);
1418	AssertMsgFailed(("%p/%#x: cFree=%#x - it should be 0 in bound mode!\n", pChunk, pChunk->Core.Key, pChunk->cFree));
1419
1420	gmmR0UnlinkChunk(pChunk);
1421	pChunk->cFree = GMM_CHUNK_NUM_PAGES;
1422	gmmR0LinkChunk(pChunk, pChunk->cShared ? &g_pGMM->Shared : &g_pGMM->Private);
1423	}
1424	}
1425
1426	/*
1427	* Look for a mapping belonging to the terminating VM.
1428	*/
1429	GMMR0CHUNKMTXSTATE MtxState;
1430	gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
1431	unsigned cMappings = pChunk->cMappingsX;
1432	for (unsigned i = 0; i < cMappings; i++)
1433	if (pChunk->paMappingsX[i].pGVM == pGVM)
1434	{
1435	gmmR0ChunkMutexDropGiant(&MtxState);
1436
1437	RTR0MEMOBJ hMemObj = pChunk->paMappingsX[i].hMapObj;
1438
1439	cMappings--;
1440	if (i < cMappings)
1441	pChunk->paMappingsX[i] = pChunk->paMappingsX[cMappings];
1442	pChunk->paMappingsX[cMappings].pGVM = NULL;
1443	pChunk->paMappingsX[cMappings].hMapObj = NIL_RTR0MEMOBJ;
1444	Assert(pChunk->cMappingsX - 1U == cMappings);
1445	pChunk->cMappingsX = cMappings;
1446
1447	int rc = RTR0MemObjFree(hMemObj, false /* fFreeMappings (NA) */);
1448	if (RT_FAILURE(rc))
1449	{
1450	SUPR0Printf("gmmR0CleanupVMScanChunk: %p/%#x: mapping #%x: RTRMemObjFree(%p,false) -> %d \n",
1451	pChunk, pChunk->Core.Key, i, hMemObj, rc);
1452	AssertRC(rc);
1453	}
1454
1455	gmmR0ChunkMutexRelease(&MtxState, pChunk);
1456	return true;
1457	}
1458
1459	gmmR0ChunkMutexRelease(&MtxState, pChunk);
1460	return false;
1461	}
1462
1463
1464	/**
1465	* The initial resource reservations.
1466	*
1467	* This will make memory reservations according to policy and priority. If there aren't
1468	* sufficient resources available to sustain the VM this function will fail and all
1469	* future allocations requests will fail as well.
1470	*
1471	* These are just the initial reservations made very very early during the VM creation
1472	* process and will be adjusted later in the GMMR0UpdateReservation call after the
1473	* ring-3 init has completed.
1474	*
1475	* @returns VBox status code.
1476	* @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1477	* @retval VERR_GMM_
1478	*
1479	* @param pVM Pointer to the shared VM structure.
1480	* @param idCpu VCPU id
1481	* @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1482	* This does not include MMIO2 and similar.
1483	* @param cShadowPages The number of pages that may be allocated for shadow paging structures.
1484	* @param cFixedPages The number of pages that may be allocated for fixed objects like the
1485	* hyper heap, MMIO2 and similar.
1486	* @param enmPolicy The OC policy to use on this VM.
1487	* @param enmPriority The priority in an out-of-memory situation.
1488	*
1489	* @thread The creator thread / EMT.
1490	*/
1491	GMMR0DECL(int) GMMR0InitialReservation(PVM pVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages,
1492	GMMOCPOLICY enmPolicy, GMMPRIORITY enmPriority)
1493	{
1494	LogFlow(("GMMR0InitialReservation: pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x enmPolicy=%d enmPriority=%d\n",
1495	pVM, cBasePages, cShadowPages, cFixedPages, enmPolicy, enmPriority));
1496
1497	/*
1498	* Validate, get basics and take the semaphore.
1499	*/
1500	PGMM pGMM;
1501	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1502	PGVM pGVM;
1503	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
1504	if (RT_FAILURE(rc))
1505	return rc;
1506
1507	AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1508	AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1509	AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1510	AssertReturn(enmPolicy > GMMOCPOLICY_INVALID && enmPolicy < GMMOCPOLICY_END, VERR_INVALID_PARAMETER);
1511	AssertReturn(enmPriority > GMMPRIORITY_INVALID && enmPriority < GMMPRIORITY_END, VERR_INVALID_PARAMETER);
1512
1513	gmmR0MutexAcquire(pGMM);
1514	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1515	{
1516	if ( !pGVM->gmm.s.Reserved.cBasePages
1517	&& !pGVM->gmm.s.Reserved.cFixedPages
1518	&& !pGVM->gmm.s.Reserved.cShadowPages)
1519	{
1520	/*
1521	* Check if we can accommodate this.
1522	*/
1523	/* ... later ... */
1524	if (RT_SUCCESS(rc))
1525	{
1526	/*
1527	* Update the records.
1528	*/
1529	pGVM->gmm.s.Reserved.cBasePages = cBasePages;
1530	pGVM->gmm.s.Reserved.cFixedPages = cFixedPages;
1531	pGVM->gmm.s.Reserved.cShadowPages = cShadowPages;
1532	pGVM->gmm.s.enmPolicy = enmPolicy;
1533	pGVM->gmm.s.enmPriority = enmPriority;
1534	pGVM->gmm.s.fMayAllocate = true;
1535
1536	pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1537	pGMM->cRegisteredVMs++;
1538	}
1539	}
1540	else
1541	rc = VERR_WRONG_ORDER;
1542	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1543	}
1544	else
1545	rc = VERR_INTERNAL_ERROR_5;
1546	gmmR0MutexRelease(pGMM);
1547	LogFlow(("GMMR0InitialReservation: returns %Rrc\n", rc));
1548	return rc;
1549	}
1550
1551
1552	/**
1553	* VMMR0 request wrapper for GMMR0InitialReservation.
1554	*
1555	* @returns see GMMR0InitialReservation.
1556	* @param pVM Pointer to the shared VM structure.
1557	* @param idCpu VCPU id
1558	* @param pReq The request packet.
1559	*/
1560	GMMR0DECL(int) GMMR0InitialReservationReq(PVM pVM, VMCPUID idCpu, PGMMINITIALRESERVATIONREQ pReq)
1561	{
1562	/*
1563	* Validate input and pass it on.
1564	*/
1565	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1566	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1567	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
1568
1569	return GMMR0InitialReservation(pVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages, pReq->enmPolicy, pReq->enmPriority);
1570	}
1571
1572
1573	/**
1574	* This updates the memory reservation with the additional MMIO2 and ROM pages.
1575	*
1576	* @returns VBox status code.
1577	* @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1578	*
1579	* @param pVM Pointer to the shared VM structure.
1580	* @param idCpu VCPU id
1581	* @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1582	* This does not include MMIO2 and similar.
1583	* @param cShadowPages The number of pages that may be allocated for shadow paging structures.
1584	* @param cFixedPages The number of pages that may be allocated for fixed objects like the
1585	* hyper heap, MMIO2 and similar.
1586	*
1587	* @thread EMT.
1588	*/
1589	GMMR0DECL(int) GMMR0UpdateReservation(PVM pVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages)
1590	{
1591	LogFlow(("GMMR0UpdateReservation: pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x\n",
1592	pVM, cBasePages, cShadowPages, cFixedPages));
1593
1594	/*
1595	* Validate, get basics and take the semaphore.
1596	*/
1597	PGMM pGMM;
1598	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1599	PGVM pGVM;
1600	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
1601	if (RT_FAILURE(rc))
1602	return rc;
1603
1604	AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1605	AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1606	AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1607
1608	gmmR0MutexAcquire(pGMM);
1609	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1610	{
1611	if ( pGVM->gmm.s.Reserved.cBasePages
1612	&& pGVM->gmm.s.Reserved.cFixedPages
1613	&& pGVM->gmm.s.Reserved.cShadowPages)
1614	{
1615	/*
1616	* Check if we can accommodate this.
1617	*/
1618	/* ... later ... */
1619	if (RT_SUCCESS(rc))
1620	{
1621	/*
1622	* Update the records.
1623	*/
1624	pGMM->cReservedPages -= pGVM->gmm.s.Reserved.cBasePages
1625	+ pGVM->gmm.s.Reserved.cFixedPages
1626	+ pGVM->gmm.s.Reserved.cShadowPages;
1627	pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1628
1629	pGVM->gmm.s.Reserved.cBasePages = cBasePages;
1630	pGVM->gmm.s.Reserved.cFixedPages = cFixedPages;
1631	pGVM->gmm.s.Reserved.cShadowPages = cShadowPages;
1632	}
1633	}
1634	else
1635	rc = VERR_WRONG_ORDER;
1636	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1637	}
1638	else
1639	rc = VERR_INTERNAL_ERROR_5;
1640	gmmR0MutexRelease(pGMM);
1641	LogFlow(("GMMR0UpdateReservation: returns %Rrc\n", rc));
1642	return rc;
1643	}
1644
1645
1646	/**
1647	* VMMR0 request wrapper for GMMR0UpdateReservation.
1648	*
1649	* @returns see GMMR0UpdateReservation.
1650	* @param pVM Pointer to the shared VM structure.
1651	* @param idCpu VCPU id
1652	* @param pReq The request packet.
1653	*/
1654	GMMR0DECL(int) GMMR0UpdateReservationReq(PVM pVM, VMCPUID idCpu, PGMMUPDATERESERVATIONREQ pReq)
1655	{
1656	/*
1657	* Validate input and pass it on.
1658	*/
1659	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1660	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1661	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
1662
1663	return GMMR0UpdateReservation(pVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages);
1664	}
1665
1666
1667	/**
1668	* Performs sanity checks on a free set.
1669	*
1670	* @returns Error count.
1671	*
1672	* @param pGMM Pointer to the GMM instance.
1673	* @param pSet Pointer to the set.
1674	* @param pszSetName The set name.
1675	* @param pszFunction The function from which it was called.
1676	* @param uLine The line number.
1677	*/
1678	static uint32_t gmmR0SanityCheckSet(PGMM pGMM, PGMMCHUNKFREESET pSet, const char *pszSetName,
1679	const char *pszFunction, unsigned uLineNo)
1680	{
1681	uint32_t cErrors = 0;
1682
1683	/*
1684	* Count the free pages in all the chunks and match it against pSet->cFreePages.
1685	*/
1686	uint32_t cPages = 0;
1687	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1688	{
1689	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1690	{
1691	/** @todo check that the chunk is hash into the right set. */
1692	cPages += pCur->cFree;
1693	}
1694	}
1695	if (RT_UNLIKELY(cPages != pSet->cFreePages))
1696	{
1697	SUPR0Printf("GMM insanity: found %#x pages in the %s set, expected %#x. (%s, line %u)\n",
1698	cPages, pszSetName, pSet->cFreePages, pszFunction, uLineNo);
1699	cErrors++;
1700	}
1701
1702	return cErrors;
1703	}
1704
1705
1706	/**
1707	* Performs some sanity checks on the GMM while owning lock.
1708	*
1709	* @returns Error count.
1710	*
1711	* @param pGMM Pointer to the GMM instance.
1712	* @param pszFunction The function from which it is called.
1713	* @param uLineNo The line number.
1714	*/
1715	static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo)
1716	{
1717	uint32_t cErrors = 0;
1718
1719	cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->Private, "private", pszFunction, uLineNo);
1720	cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->Shared, "shared", pszFunction, uLineNo);
1721	/** @todo add more sanity checks. */
1722
1723	return cErrors;
1724	}
1725
1726
1727	/**
1728	* Looks up a chunk in the tree and fill in the TLB entry for it.
1729	*
1730	* This is not expected to fail and will bitch if it does.
1731	*
1732	* @returns Pointer to the allocation chunk, NULL if not found.
1733	* @param pGMM Pointer to the GMM instance.
1734	* @param idChunk The ID of the chunk to find.
1735	* @param pTlbe Pointer to the TLB entry.
1736	*/
1737	static PGMMCHUNK gmmR0GetChunkSlow(PGMM pGMM, uint32_t idChunk, PGMMCHUNKTLBE pTlbe)
1738	{
1739	PGMMCHUNK pChunk = (PGMMCHUNK)RTAvlU32Get(&pGMM->pChunks, idChunk);
1740	AssertMsgReturn(pChunk, ("Chunk %#x not found!\n", idChunk), NULL);
1741	pTlbe->idChunk = idChunk;
1742	pTlbe->pChunk = pChunk;
1743	return pChunk;
1744	}
1745
1746
1747	/**
1748	* Finds a allocation chunk.
1749	*
1750	* This is not expected to fail and will bitch if it does.
1751	*
1752	* @returns Pointer to the allocation chunk, NULL if not found.
1753	* @param pGMM Pointer to the GMM instance.
1754	* @param idChunk The ID of the chunk to find.
1755	*/
1756	DECLINLINE(PGMMCHUNK) gmmR0GetChunk(PGMM pGMM, uint32_t idChunk)
1757	{
1758	/*
1759	* Do a TLB lookup, branch if not in the TLB.
1760	*/
1761	PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(idChunk)];
1762	if ( pTlbe->idChunk != idChunk
1763	\|\| !pTlbe->pChunk)
1764	return gmmR0GetChunkSlow(pGMM, idChunk, pTlbe);
1765	return pTlbe->pChunk;
1766	}
1767
1768
1769	/**
1770	* Finds a page.
1771	*
1772	* This is not expected to fail and will bitch if it does.
1773	*
1774	* @returns Pointer to the page, NULL if not found.
1775	* @param pGMM Pointer to the GMM instance.
1776	* @param idPage The ID of the page to find.
1777	*/
1778	DECLINLINE(PGMMPAGE) gmmR0GetPage(PGMM pGMM, uint32_t idPage)
1779	{
1780	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
1781	if (RT_LIKELY(pChunk))
1782	return &pChunk->aPages[idPage & GMM_PAGEID_IDX_MASK];
1783	return NULL;
1784	}
1785
1786
1787	/**
1788	* Gets the host physical address for a page given by it's ID.
1789	*
1790	* @returns The host physical address or NIL_RTHCPHYS.
1791	* @param pGMM Pointer to the GMM instance.
1792	* @param idPage The ID of the page to find.
1793	*/
1794	DECLINLINE(RTHCPHYS) gmmR0GetPageHCPhys(PGMM pGMM, uint32_t idPage)
1795	{
1796	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
1797	if (RT_LIKELY(pChunk))
1798	return RTR0MemObjGetPagePhysAddr(pChunk->hMemObj, idPage & GMM_PAGEID_IDX_MASK);
1799	return NIL_RTHCPHYS;
1800	}
1801
1802
1803	/**
1804	* Selects the appropriate free list given the number of free pages.
1805	*
1806	* @returns Free list index.
1807	* @param cFree The number of free pages in the chunk.
1808	*/
1809	DECLINLINE(unsigned) gmmR0SelectFreeSetList(unsigned cFree)
1810	{
1811	unsigned iList = cFree >> GMM_CHUNK_FREE_SET_SHIFT;
1812	AssertMsg(iList < RT_SIZEOFMEMB(GMMCHUNKFREESET, apLists) / RT_SIZEOFMEMB(GMMCHUNKFREESET, apLists[0]),
1813	("%d (%u)\n", iList, cFree));
1814	return iList;
1815	}
1816
1817
1818	/**
1819	* Unlinks the chunk from the free list it's currently on (if any).
1820	*
1821	* @param pChunk The allocation chunk.
1822	*/
1823	DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk)
1824	{
1825	PGMMCHUNKFREESET pSet = pChunk->pSet;
1826	if (RT_LIKELY(pSet))
1827	{
1828	pSet->cFreePages -= pChunk->cFree;
1829	pSet->idGeneration++;
1830
1831	PGMMCHUNK pPrev = pChunk->pFreePrev;
1832	PGMMCHUNK pNext = pChunk->pFreeNext;
1833	if (pPrev)
1834	pPrev->pFreeNext = pNext;
1835	else
1836	pSet->apLists[gmmR0SelectFreeSetList(pChunk->cFree)] = pNext;
1837	if (pNext)
1838	pNext->pFreePrev = pPrev;
1839
1840	pChunk->pSet = NULL;
1841	pChunk->pFreeNext = NULL;
1842	pChunk->pFreePrev = NULL;
1843	}
1844	else
1845	{
1846	Assert(!pChunk->pFreeNext);
1847	Assert(!pChunk->pFreePrev);
1848	Assert(!pChunk->cFree);
1849	}
1850	}
1851
1852
1853	/**
1854	* Links the chunk onto the appropriate free list in the specified free set.
1855	*
1856	* If no free entries, it's not linked into any list.
1857	*
1858	* @param pChunk The allocation chunk.
1859	* @param pSet The free set.
1860	*/
1861	DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet)
1862	{
1863	Assert(!pChunk->pSet);
1864	Assert(!pChunk->pFreeNext);
1865	Assert(!pChunk->pFreePrev);
1866
1867	if (pChunk->cFree > 0)
1868	{
1869	pChunk->pSet = pSet;
1870	pChunk->pFreePrev = NULL;
1871	unsigned const iList = gmmR0SelectFreeSetList(pChunk->cFree);
1872	pChunk->pFreeNext = pSet->apLists[iList];
1873	if (pChunk->pFreeNext)
1874	pChunk->pFreeNext->pFreePrev = pChunk;
1875	pSet->apLists[iList] = pChunk;
1876
1877	pSet->cFreePages += pChunk->cFree;
1878	pSet->idGeneration++;
1879	}
1880	}
1881
1882
1883	/**
1884	* Frees a Chunk ID.
1885	*
1886	* @param pGMM Pointer to the GMM instance.
1887	* @param idChunk The Chunk ID to free.
1888	*/
1889	static void gmmR0FreeChunkId(PGMM pGMM, uint32_t idChunk)
1890	{
1891	AssertReturnVoid(idChunk != NIL_GMM_CHUNKID);
1892	AssertMsg(ASMBitTest(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk));
1893	ASMAtomicBitClear(&pGMM->bmChunkId[0], idChunk);
1894	}
1895
1896
1897	/**
1898	* Allocates a new Chunk ID.
1899	*
1900	* @returns The Chunk ID.
1901	* @param pGMM Pointer to the GMM instance.
1902	*/
1903	static uint32_t gmmR0AllocateChunkId(PGMM pGMM)
1904	{
1905	AssertCompile(!((GMM_CHUNKID_LAST + 1) & 31)); /* must be a multiple of 32 */
1906	AssertCompile(NIL_GMM_CHUNKID == 0);
1907
1908	/*
1909	* Try the next sequential one.
1910	*/
1911	int32_t idChunk = ++pGMM->idChunkPrev;
1912	#if 0 /** @todo enable this code */
1913	if ( idChunk <= GMM_CHUNKID_LAST
1914	&& idChunk > NIL_GMM_CHUNKID
1915	&& !ASMAtomicBitTestAndSet(&pVMM->bmChunkId[0], idChunk))
1916	return idChunk;
1917	#endif
1918
1919	/*
1920	* Scan sequentially from the last one.
1921	*/
1922	if ( (uint32_t)idChunk < GMM_CHUNKID_LAST
1923	&& idChunk > NIL_GMM_CHUNKID)
1924	{
1925	idChunk = ASMBitNextClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1, idChunk);
1926	if (idChunk > NIL_GMM_CHUNKID)
1927	{
1928	AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
1929	return pGMM->idChunkPrev = idChunk;
1930	}
1931	}
1932
1933	/*
1934	* Ok, scan from the start.
1935	* We're not racing anyone, so there is no need to expect failures or have restart loops.
1936	*/
1937	idChunk = ASMBitFirstClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1);
1938	AssertMsgReturn(idChunk > NIL_GMM_CHUNKID, ("%#x\n", idChunk), NIL_GVM_HANDLE);
1939	AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
1940
1941	return pGMM->idChunkPrev = idChunk;
1942	}
1943
1944
1945	/**
1946	* Registers a new chunk of memory.
1947	*
1948	* This is called by both gmmR0AllocateOneChunk and GMMR0SeedChunk.
1949	*
1950	* @returns VBox status code. On success, the giant GMM lock will be held, the
1951	* caller must release it (ugly).
1952	* @param pGMM Pointer to the GMM instance.
1953	* @param pSet Pointer to the set.
1954	* @param MemObj The memory object for the chunk.
1955	* @param hGVM The affinity of the chunk. NIL_GVM_HANDLE for no
1956	* affinity.
1957	* @param fChunkFlags The chunk flags, GMM_CHUNK_FLAGS_XXX.
1958	* @param ppChunk Chunk address (out). Optional.
1959	*
1960	* @remarks The caller must not own the giant GMM mutex.
1961	* The giant GMM mutex will be acquired and returned acquired in
1962	* the success path. On failure, no locks will be held.
1963	*/
1964	static int gmmR0RegisterChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, RTR0MEMOBJ MemObj, uint16_t hGVM, uint16_t fChunkFlags,
1965	PGMMCHUNK *ppChunk)
1966	{
1967	Assert(pGMM->hMtxOwner != RTThreadNativeSelf());
1968	Assert(hGVM != NIL_GVM_HANDLE \|\| pGMM->fBoundMemoryMode);
1969	Assert(fChunkFlags == 0 \|\| fChunkFlags == GMM_CHUNK_FLAGS_LARGE_PAGE);
1970
1971	int rc;
1972	PGMMCHUNK pChunk = (PGMMCHUNK)RTMemAllocZ(sizeof(*pChunk));
1973	if (pChunk)
1974	{
1975	/*
1976	* Initialize it.
1977	*/
1978	pChunk->hMemObj = MemObj;
1979	pChunk->cFree = GMM_CHUNK_NUM_PAGES;
1980	pChunk->hGVM = hGVM;
1981	/pChunk->iFreeHead = 0;/
1982	pChunk->idNumaNode = gmmR0GetCurrentNumaNodeId();
1983	pChunk->iChunkMtx = UINT8_MAX;
1984	pChunk->fFlags = fChunkFlags;
1985	for (unsigned iPage = 0; iPage < RT_ELEMENTS(pChunk->aPages) - 1; iPage++)
1986	{
1987	pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
1988	pChunk->aPages[iPage].Free.iNext = iPage + 1;
1989	}
1990	pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.u2State = GMM_PAGE_STATE_FREE;
1991	pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.iNext = UINT16_MAX;
1992
1993	/*
1994	* Allocate a Chunk ID and insert it into the tree.
1995	* This has to be done behind the mutex of course.
1996	*/
1997	rc = gmmR0MutexAcquire(pGMM);
1998	if (RT_SUCCESS(rc))
1999	{
2000	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2001	{
2002	pChunk->Core.Key = gmmR0AllocateChunkId(pGMM);
2003	if ( pChunk->Core.Key != NIL_GMM_CHUNKID
2004	&& pChunk->Core.Key <= GMM_CHUNKID_LAST
2005	&& RTAvlU32Insert(&pGMM->pChunks, &pChunk->Core))
2006	{
2007	pGMM->cChunks++;
2008	RTListAppend(&pGMM->ChunkList, &pChunk->ListNode);
2009	gmmR0LinkChunk(pChunk, pSet);
2010	LogFlow(("gmmR0RegisterChunk: pChunk=%p id=%#x cChunks=%d\n", pChunk, pChunk->Core.Key, pGMM->cChunks));
2011
2012	if (ppChunk)
2013	*ppChunk = pChunk;
2014	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2015	return VINF_SUCCESS;
2016	}
2017
2018	/* bail out */
2019	rc = VERR_INTERNAL_ERROR;
2020	}
2021	else
2022	rc = VERR_INTERNAL_ERROR_5;
2023	gmmR0MutexRelease(pGMM);
2024	}
2025
2026	RTMemFree(pChunk);
2027	}
2028	else
2029	rc = VERR_NO_MEMORY;
2030	return rc;
2031	}
2032
2033
2034	/**
2035	* Allocate one new chunk and add it to the specified free set.
2036	*
2037	* @returns VBox status code.
2038	* @param pGMM Pointer to the GMM instance.
2039	* @param pSet Pointer to the set.
2040	* @param hGVM The affinity of the new chunk.
2041	*
2042	* @remarks The giant mutex will be temporarily abandond during the allocation.
2043	*/
2044	static int gmmR0AllocateOneChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, uint16_t hGVM)
2045	{
2046	/*
2047	* Allocate the memory.
2048	*
2049	* Note! We leave the giant GMM lock temporarily as the allocation might
2050	* take a long time. gmmR0RegisterChunk reacquires it (ugly).
2051	*/
2052	gmmR0MutexRelease(pGMM);
2053
2054	RTR0MEMOBJ hMemObj;
2055	int rc = RTR0MemObjAllocPhysNC(&hMemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS);
2056	if (RT_SUCCESS(rc))
2057	{
2058	rc = gmmR0RegisterChunk(pGMM, pSet, hMemObj, hGVM, 0 /fChunkFlags/, NULL);
2059	if (RT_SUCCESS(rc))
2060	return rc;
2061
2062	RTR0MemObjFree(hMemObj, false /* fFreeMappings */);
2063	}
2064
2065	int rc2 = gmmR0MutexAcquire(pGMM);
2066	AssertRCReturn(rc2, RT_FAILURE(rc) ? rc : rc2);
2067	return rc;
2068	}
2069
2070
2071	/**
2072	* Attempts to allocate more pages until the requested amount is met.
2073	*
2074	* @returns VBox status code.
2075	* @param pGMM Pointer to the GMM instance data.
2076	* @param pGVM The calling VM.
2077	* @param pSet Pointer to the free set to grow.
2078	* @param cPages The number of pages needed.
2079	* @param pStrategy Pointer to the allocation strategy data. This is input
2080	* and output.
2081	*
2082	* @remarks Called owning the mutex, but will leave it temporarily while
2083	* allocating the memory!
2084	*/
2085	static int gmmR0AllocateMoreChunks(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet, uint32_t cPages,
2086	PGMMR0ALLOCPAGESTRATEGY pStrategy)
2087	{
2088	Assert(!pGMM->fLegacyAllocationMode);
2089
2090	if (!GMM_CHECK_SANITY_IN_LOOPS(pGMM))
2091	return VERR_INTERNAL_ERROR_4;
2092
2093	if (!pGMM->fBoundMemoryMode)
2094	{
2095	/*
2096	* Try steal free chunks from the other set first. (Only take 100% free chunks.)
2097	*/
2098	PGMMCHUNKFREESET pOtherSet = pSet == &pGMM->Private ? &pGMM->Shared : &pGMM->Private;
2099	while ( pSet->cFreePages < cPages
2100	&& pOtherSet->cFreePages >= GMM_CHUNK_NUM_PAGES)
2101	{
2102	PGMMCHUNK pChunk = pOtherSet->apLists[RT_ELEMENTS(pOtherSet->apLists) - 1];
2103	while ( pChunk
2104	&& pChunk->cFree != GMM_CHUNK_NUM_PAGES)
2105	pChunk = pChunk->pFreeNext;
2106	if (!pChunk)
2107	break;
2108
2109	gmmR0UnlinkChunk(pChunk);
2110	gmmR0LinkChunk(pChunk, pSet);
2111	}
2112
2113	/*
2114	* If we need still more pages, allocate new chunks.
2115	* Note! We will leave the mutex while doing the allocation,
2116	*/
2117	while (pSet->cFreePages < cPages)
2118	{
2119	int rc = gmmR0AllocateOneChunk(pGMM, pSet, pGVM->hSelf);
2120	if (RT_FAILURE(rc))
2121	return rc;
2122	if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2123	return VERR_INTERNAL_ERROR_5;
2124	}
2125	}
2126	else
2127	{
2128	/*
2129	* The memory is bound to the VM allocating it, so we have to count
2130	* the free pages carefully as well as making sure we brand them with
2131	* our VM handle.
2132	*
2133	* Note! We will leave the mutex while doing the allocation,
2134	*/
2135	uint16_t const hGVM = pGVM->hSelf;
2136	for (;;)
2137	{
2138	/* Count and see if we've reached the goal. */
2139	uint32_t cPagesFound = 0;
2140	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
2141	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
2142	if (pCur->hGVM == hGVM)
2143	{
2144	cPagesFound += pCur->cFree;
2145	if (cPagesFound >= cPages)
2146	break;
2147	}
2148	if (cPagesFound >= cPages)
2149	break;
2150
2151	/* Allocate more. */
2152	int rc = gmmR0AllocateOneChunk(pGMM, pSet, hGVM);
2153	if (RT_FAILURE(rc))
2154	return rc;
2155	if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2156	return VERR_INTERNAL_ERROR_5;
2157	}
2158	}
2159
2160	return VINF_SUCCESS;
2161	}
2162
2163
2164	/**
2165	* Allocates one private page.
2166	*
2167	* Worker for gmmR0AllocatePages.
2168	*
2169	* @param pGMM Pointer to the GMM instance data.
2170	* @param hGVM The GVM handle of the VM requesting memory.
2171	* @param pChunk The chunk to allocate it from.
2172	* @param pPageDesc The page descriptor.
2173	*/
2174	static void gmmR0AllocatePage(PGMM pGMM, uint32_t hGVM, PGMMCHUNK pChunk, PGMMPAGEDESC pPageDesc)
2175	{
2176	/* update the chunk stats. */
2177	if (pChunk->hGVM == NIL_GVM_HANDLE)
2178	pChunk->hGVM = hGVM;
2179	Assert(pChunk->cFree);
2180	pChunk->cFree--;
2181	pChunk->cPrivate++;
2182
2183	/* unlink the first free page. */
2184	const uint32_t iPage = pChunk->iFreeHead;
2185	AssertReleaseMsg(iPage < RT_ELEMENTS(pChunk->aPages), ("%d\n", iPage));
2186	PGMMPAGE pPage = &pChunk->aPages[iPage];
2187	Assert(GMM_PAGE_IS_FREE(pPage));
2188	pChunk->iFreeHead = pPage->Free.iNext;
2189	Log3(("A pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x iNext=%#x\n",
2190	pPage, iPage, (pChunk->Core.Key << GMM_CHUNKID_SHIFT) \| iPage,
2191	pPage->Common.u2State, pChunk->iFreeHead, pPage->Free.iNext));
2192
2193	/* make the page private. */
2194	pPage->u = 0;
2195	AssertCompile(GMM_PAGE_STATE_PRIVATE == 0);
2196	pPage->Private.hGVM = hGVM;
2197	AssertCompile(NIL_RTHCPHYS >= GMM_GCPHYS_LAST);
2198	AssertCompile(GMM_GCPHYS_UNSHAREABLE >= GMM_GCPHYS_LAST);
2199	if (pPageDesc->HCPhysGCPhys <= GMM_GCPHYS_LAST)
2200	pPage->Private.pfn = pPageDesc->HCPhysGCPhys >> PAGE_SHIFT;
2201	else
2202	pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE; /* unshareable / unassigned - same thing. */
2203
2204	/* update the page descriptor. */
2205	pPageDesc->HCPhysGCPhys = RTR0MemObjGetPagePhysAddr(pChunk->hMemObj, iPage);
2206	Assert(pPageDesc->HCPhysGCPhys != NIL_RTHCPHYS);
2207	pPageDesc->idPage = (pChunk->Core.Key << GMM_CHUNKID_SHIFT) \| iPage;
2208	pPageDesc->idSharedPage = NIL_GMM_PAGEID;
2209	}
2210
2211
2212	/**
2213	* Common worker for GMMR0AllocateHandyPages and GMMR0AllocatePages.
2214	*
2215	* @returns VBox status code:
2216	* @retval VINF_SUCCESS on success.
2217	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk or
2218	* gmmR0AllocateMoreChunks is necessary.
2219	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2220	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2221	* that is we're trying to allocate more than we've reserved.
2222	*
2223	* @param pGMM Pointer to the GMM instance data.
2224	* @param pGVM Pointer to the shared VM structure.
2225	* @param cPages The number of pages to allocate.
2226	* @param paPages Pointer to the page descriptors.
2227	* See GMMPAGEDESC for details on what is expected on input.
2228	* @param enmAccount The account to charge.
2229	* @param pStrategy Pointer to the allocation strategy data. This
2230	* is input and output.
2231	*/
2232	static int gmmR0AllocatePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount,
2233	PGMMR0ALLOCPAGESTRATEGY pStrategy)
2234	{
2235	/*
2236	* Check allocation limits.
2237	*/
2238	if (RT_UNLIKELY(pGMM->cAllocatedPages + cPages > pGMM->cMaxPages))
2239	return VERR_GMM_HIT_GLOBAL_LIMIT;
2240
2241	switch (enmAccount)
2242	{
2243	case GMMACCOUNT_BASE:
2244	if (RT_UNLIKELY( pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cPages
2245	> pGVM->gmm.s.Reserved.cBasePages))
2246	{
2247	Log(("gmmR0AllocatePages:Base: Reserved=%#llx Allocated+Ballooned+Requested=%#llx+%#llx+%#x!\n",
2248	pGVM->gmm.s.Reserved.cBasePages, pGVM->gmm.s.Allocated.cBasePages, pGVM->gmm.s.cBalloonedPages, cPages));
2249	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2250	}
2251	break;
2252	case GMMACCOUNT_SHADOW:
2253	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cShadowPages + cPages > pGVM->gmm.s.Reserved.cShadowPages))
2254	{
2255	Log(("gmmR0AllocatePages:Shadow: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
2256	pGVM->gmm.s.Reserved.cShadowPages, pGVM->gmm.s.Allocated.cShadowPages, cPages));
2257	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2258	}
2259	break;
2260	case GMMACCOUNT_FIXED:
2261	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cFixedPages + cPages > pGVM->gmm.s.Reserved.cFixedPages))
2262	{
2263	Log(("gmmR0AllocatePages:Fixed: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
2264	pGVM->gmm.s.Reserved.cFixedPages, pGVM->gmm.s.Allocated.cFixedPages, cPages));
2265	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2266	}
2267	break;
2268	default:
2269	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2270	}
2271
2272	/*
2273	* Check if we need to allocate more memory or not. In bound memory mode this
2274	* is a bit extra work but it's easier to do it upfront than bailing out later.
2275	*/
2276	PGMMCHUNKFREESET pSet = &pGMM->Private;
2277	if (pSet->cFreePages < cPages)
2278	return VERR_GMM_SEED_ME;
2279
2280	/** @todo Rewrite this to use the page array for storing chunk IDs and other
2281	* state info needed to avoid the multipass sillyness. */
2282	if (pGMM->fBoundMemoryMode)
2283	{
2284	uint16_t hGVM = pGVM->hSelf;
2285	uint32_t cPagesFound = 0;
2286	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
2287	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
2288	if (pCur->hGVM == hGVM)
2289	{
2290	cPagesFound += pCur->cFree;
2291	if (cPagesFound >= cPages)
2292	break;
2293	}
2294	if (cPagesFound < cPages)
2295	return VERR_GMM_SEED_ME;
2296	}
2297
2298	/*
2299	* Pick the pages.
2300	* Try make some effort keeping VMs sharing private chunks.
2301	*/
2302	uint16_t hGVM = pGVM->hSelf;
2303	uint32_t iPage = 0;
2304
2305	/* first round, pick from chunks with an affinity to the VM. */
2306	for (unsigned i = 0; i < GMM_CHUNK_FREE_SET_UNUSED_LIST && iPage < cPages; i++)
2307	{
2308	PGMMCHUNK pCurFree = NULL;
2309	PGMMCHUNK pCur = pSet->apLists[i];
2310	while (pCur && iPage < cPages)
2311	{
2312	PGMMCHUNK pNext = pCur->pFreeNext;
2313
2314	if ( pCur->hGVM == hGVM
2315	&& pCur->cFree < GMM_CHUNK_NUM_PAGES)
2316	{
2317	gmmR0UnlinkChunk(pCur);
2318	for (; pCur->cFree && iPage < cPages; iPage++)
2319	gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
2320	gmmR0LinkChunk(pCur, pSet);
2321	}
2322
2323	pCur = pNext;
2324	}
2325	}
2326
2327	if (iPage < cPages)
2328	{
2329	/* second round, pick pages from the 100% empty chunks we just skipped above. */
2330	PGMMCHUNK pCurFree = NULL;
2331	PGMMCHUNK pCur = pSet->apLists[GMM_CHUNK_FREE_SET_UNUSED_LIST];
2332	while (pCur && iPage < cPages)
2333	{
2334	PGMMCHUNK pNext = pCur->pFreeNext;
2335	Assert(pCur->cFree == GMM_CHUNK_NUM_PAGES);
2336
2337	if ( pCur->hGVM == hGVM
2338	\|\| !pGMM->fBoundMemoryMode)
2339	{
2340	gmmR0UnlinkChunk(pCur);
2341	for (; pCur->cFree && iPage < cPages; iPage++)
2342	gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
2343	gmmR0LinkChunk(pCur, pSet);
2344	}
2345
2346	pCur = pNext;
2347	}
2348	}
2349
2350	if ( iPage < cPages
2351	&& !pGMM->fBoundMemoryMode)
2352	{
2353	/* third round, disregard affinity. */
2354	unsigned i = RT_ELEMENTS(pSet->apLists);
2355	while (i-- > 0 && iPage < cPages)
2356	{
2357	PGMMCHUNK pCurFree = NULL;
2358	PGMMCHUNK pCur = pSet->apLists[i];
2359	while (pCur && iPage < cPages)
2360	{
2361	PGMMCHUNK pNext = pCur->pFreeNext;
2362
2363	if ( pCur->cFree > GMM_CHUNK_NUM_PAGES / 2
2364	&& cPages >= GMM_CHUNK_NUM_PAGES / 2)
2365	pCur->hGVM = hGVM; /* change chunk affinity */
2366
2367	gmmR0UnlinkChunk(pCur);
2368	for (; pCur->cFree && iPage < cPages; iPage++)
2369	gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
2370	gmmR0LinkChunk(pCur, pSet);
2371
2372	pCur = pNext;
2373	}
2374	}
2375	}
2376
2377	/*
2378	* Update the account.
2379	*/
2380	switch (enmAccount)
2381	{
2382	case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages += iPage; break;
2383	case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages += iPage; break;
2384	case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages += iPage; break;
2385	default:
2386	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2387	}
2388	pGVM->gmm.s.cPrivatePages += iPage;
2389	pGMM->cAllocatedPages += iPage;
2390
2391	AssertMsgReturn(iPage == cPages, ("%u != %u\n", iPage, cPages), VERR_INTERNAL_ERROR);
2392
2393	/*
2394	* Check if we've reached some threshold and should kick one or two VMs and tell
2395	* them to inflate their balloons a bit more... later.
2396	*/
2397
2398	return VINF_SUCCESS;
2399	}
2400
2401
2402	/**
2403	* Determins the initial page allocation strategy and initializes the data
2404	* structure.
2405	*
2406	* @param pGMM Pointer to the GMM instance data.
2407	* @param pGVM Pointer to the shared VM structure.
2408	* @param pStrategy The data structure to initialize.
2409	*/
2410	static void gmmR0AllocatePagesInitStrategy(PGMM pGMM, PGVM pGVM, PGMMR0ALLOCPAGESTRATEGY pStrategy)
2411	{
2412	pStrategy->cTries = 0;
2413	}
2414
2415
2416	/**
2417	* Updates the previous allocations and allocates more pages.
2418	*
2419	* The handy pages are always taken from the 'base' memory account.
2420	* The allocated pages are not cleared and will contains random garbage.
2421	*
2422	* @returns VBox status code:
2423	* @retval VINF_SUCCESS on success.
2424	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2425	* @retval VERR_GMM_PAGE_NOT_FOUND if one of the pages to update wasn't found.
2426	* @retval VERR_GMM_PAGE_NOT_PRIVATE if one of the pages to update wasn't a
2427	* private page.
2428	* @retval VERR_GMM_PAGE_NOT_SHARED if one of the pages to update wasn't a
2429	* shared page.
2430	* @retval VERR_GMM_NOT_PAGE_OWNER if one of the pages to be updated wasn't
2431	* owned by the VM.
2432	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2433	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2434	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2435	* that is we're trying to allocate more than we've reserved.
2436	*
2437	* @param pVM Pointer to the shared VM structure.
2438	* @param idCpu VCPU id
2439	* @param cPagesToUpdate The number of pages to update (starting from the head).
2440	* @param cPagesToAlloc The number of pages to allocate (starting from the head).
2441	* @param paPages The array of page descriptors.
2442	* See GMMPAGEDESC for details on what is expected on input.
2443	* @thread EMT.
2444	*/
2445	GMMR0DECL(int) GMMR0AllocateHandyPages(PVM pVM, VMCPUID idCpu, uint32_t cPagesToUpdate, uint32_t cPagesToAlloc, PGMMPAGEDESC paPages)
2446	{
2447	LogFlow(("GMMR0AllocateHandyPages: pVM=%p cPagesToUpdate=%#x cPagesToAlloc=%#x paPages=%p\n",
2448	pVM, cPagesToUpdate, cPagesToAlloc, paPages));
2449
2450	/*
2451	* Validate, get basics and take the semaphore.
2452	* (This is a relatively busy path, so make predictions where possible.)
2453	*/
2454	PGMM pGMM;
2455	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2456	PGVM pGVM;
2457	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2458	if (RT_FAILURE(rc))
2459	return rc;
2460
2461	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2462	AssertMsgReturn( (cPagesToUpdate && cPagesToUpdate < 1024)
2463	\|\| (cPagesToAlloc && cPagesToAlloc < 1024),
2464	("cPagesToUpdate=%#x cPagesToAlloc=%#x\n", cPagesToUpdate, cPagesToAlloc),
2465	VERR_INVALID_PARAMETER);
2466
2467	unsigned iPage = 0;
2468	for (; iPage < cPagesToUpdate; iPage++)
2469	{
2470	AssertMsgReturn( ( paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
2471	&& !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK))
2472	\|\| paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
2473	\|\| paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE,
2474	("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys),
2475	VERR_INVALID_PARAMETER);
2476	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2477	/\|\| paPages[iPage].idPage == NIL_GMM_PAGEID/,
2478	("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2479	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2480	/\|\| paPages[iPage].idSharedPage == NIL_GMM_PAGEID/,
2481	("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2482	}
2483
2484	for (; iPage < cPagesToAlloc; iPage++)
2485	{
2486	AssertMsgReturn(paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS, ("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys), VERR_INVALID_PARAMETER);
2487	AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2488	AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2489	}
2490
2491	gmmR0MutexAcquire(pGMM);
2492	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2493	{
2494	/* No allocations before the initial reservation has been made! */
2495	if (RT_LIKELY( pGVM->gmm.s.Reserved.cBasePages
2496	&& pGVM->gmm.s.Reserved.cFixedPages
2497	&& pGVM->gmm.s.Reserved.cShadowPages))
2498	{
2499	/*
2500	* Perform the updates.
2501	* Stop on the first error.
2502	*/
2503	for (iPage = 0; iPage < cPagesToUpdate; iPage++)
2504	{
2505	if (paPages[iPage].idPage != NIL_GMM_PAGEID)
2506	{
2507	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idPage);
2508	if (RT_LIKELY(pPage))
2509	{
2510	if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
2511	{
2512	if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
2513	{
2514	AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
2515	if (RT_LIKELY(paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST))
2516	pPage->Private.pfn = paPages[iPage].HCPhysGCPhys >> PAGE_SHIFT;
2517	else if (paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE)
2518	pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE;
2519	/* else: NIL_RTHCPHYS nothing */
2520
2521	paPages[iPage].idPage = NIL_GMM_PAGEID;
2522	paPages[iPage].HCPhysGCPhys = NIL_RTHCPHYS;
2523	}
2524	else
2525	{
2526	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not owner! hGVM=%#x hSelf=%#x\n",
2527	iPage, paPages[iPage].idPage, pPage->Private.hGVM, pGVM->hSelf));
2528	rc = VERR_GMM_NOT_PAGE_OWNER;
2529	break;
2530	}
2531	}
2532	else
2533	{
2534	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not private! %.Rhxs (type %d)\n", iPage, paPages[iPage].idPage, sizeof(pPage), pPage, pPage->Common.u2State));
2535	rc = VERR_GMM_PAGE_NOT_PRIVATE;
2536	break;
2537	}
2538	}
2539	else
2540	{
2541	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (private)\n", iPage, paPages[iPage].idPage));
2542	rc = VERR_GMM_PAGE_NOT_FOUND;
2543	break;
2544	}
2545	}
2546
2547	if (paPages[iPage].idSharedPage != NIL_GMM_PAGEID)
2548	{
2549	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idSharedPage);
2550	if (RT_LIKELY(pPage))
2551	{
2552	if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
2553	{
2554	AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
2555	Assert(pPage->Shared.cRefs);
2556	Assert(pGVM->gmm.s.cSharedPages);
2557	Assert(pGVM->gmm.s.Allocated.cBasePages);
2558
2559	Log(("GMMR0AllocateHandyPages: free shared page %x cRefs=%d\n", paPages[iPage].idSharedPage, pPage->Shared.cRefs));
2560	pGVM->gmm.s.cSharedPages--;
2561	pGVM->gmm.s.Allocated.cBasePages--;
2562	if (!--pPage->Shared.cRefs)
2563	{
2564	gmmR0FreeSharedPage(pGMM, paPages[iPage].idSharedPage, pPage);
2565	}
2566	else
2567	{
2568	Assert(pGMM->cDuplicatePages);
2569	pGMM->cDuplicatePages--;
2570	}
2571
2572	paPages[iPage].idSharedPage = NIL_GMM_PAGEID;
2573	}
2574	else
2575	{
2576	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not shared!\n", iPage, paPages[iPage].idSharedPage));
2577	rc = VERR_GMM_PAGE_NOT_SHARED;
2578	break;
2579	}
2580	}
2581	else
2582	{
2583	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (shared)\n", iPage, paPages[iPage].idSharedPage));
2584	rc = VERR_GMM_PAGE_NOT_FOUND;
2585	break;
2586	}
2587	}
2588	}
2589
2590	/*
2591	* Join paths with GMMR0AllocatePages for the allocation.
2592	* Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
2593	*/
2594	GMMR0ALLOCPAGESTRATEGY Strategy;
2595	gmmR0AllocatePagesInitStrategy(pGMM, pGVM, &Strategy);
2596	while (RT_SUCCESS(rc))
2597	{
2598	rc = gmmR0AllocatePages(pGMM, pGVM, cPagesToAlloc, paPages, GMMACCOUNT_BASE, &Strategy);
2599	if ( rc != VERR_GMM_SEED_ME
2600	\|\| pGMM->fLegacyAllocationMode)
2601	break;
2602	rc = gmmR0AllocateMoreChunks(pGMM, pGVM, &pGMM->Private, cPagesToAlloc, &Strategy);
2603	}
2604	}
2605	else
2606	rc = VERR_WRONG_ORDER;
2607	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2608	}
2609	else
2610	rc = VERR_INTERNAL_ERROR_5;
2611	gmmR0MutexRelease(pGMM);
2612	LogFlow(("GMMR0AllocateHandyPages: returns %Rrc\n", rc));
2613	return rc;
2614	}
2615
2616
2617	/**
2618	* Allocate one or more pages.
2619	*
2620	* This is typically used for ROMs and MMIO2 (VRAM) during VM creation.
2621	* The allocated pages are not cleared and will contains random garbage.
2622	*
2623	* @returns VBox status code:
2624	* @retval VINF_SUCCESS on success.
2625	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2626	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2627	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2628	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2629	* that is we're trying to allocate more than we've reserved.
2630	*
2631	* @param pVM Pointer to the shared VM structure.
2632	* @param idCpu VCPU id
2633	* @param cPages The number of pages to allocate.
2634	* @param paPages Pointer to the page descriptors.
2635	* See GMMPAGEDESC for details on what is expected on input.
2636	* @param enmAccount The account to charge.
2637	*
2638	* @thread EMT.
2639	*/
2640	GMMR0DECL(int) GMMR0AllocatePages(PVM pVM, VMCPUID idCpu, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
2641	{
2642	LogFlow(("GMMR0AllocatePages: pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pVM, cPages, paPages, enmAccount));
2643
2644	/*
2645	* Validate, get basics and take the semaphore.
2646	*/
2647	PGMM pGMM;
2648	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2649	PGVM pGVM;
2650	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2651	if (RT_FAILURE(rc))
2652	return rc;
2653
2654	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2655	AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
2656	AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
2657
2658	for (unsigned iPage = 0; iPage < cPages; iPage++)
2659	{
2660	AssertMsgReturn( paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
2661	\|\| paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE
2662	\|\| ( enmAccount == GMMACCOUNT_BASE
2663	&& paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
2664	&& !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK)),
2665	("#%#x: %RHp enmAccount=%d\n", iPage, paPages[iPage].HCPhysGCPhys, enmAccount),
2666	VERR_INVALID_PARAMETER);
2667	AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2668	AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2669	}
2670
2671	gmmR0MutexAcquire(pGMM);
2672	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2673	{
2674
2675	/* No allocations before the initial reservation has been made! */
2676	if (RT_LIKELY( pGVM->gmm.s.Reserved.cBasePages
2677	&& pGVM->gmm.s.Reserved.cFixedPages
2678	&& pGVM->gmm.s.Reserved.cShadowPages))
2679	{
2680	/*
2681	* gmmR0AllocatePages seed loop.
2682	* Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
2683	*/
2684	GMMR0ALLOCPAGESTRATEGY Strategy;
2685	gmmR0AllocatePagesInitStrategy(pGMM, pGVM, &Strategy);
2686	while (RT_SUCCESS(rc))
2687	{
2688	rc = gmmR0AllocatePages(pGMM, pGVM, cPages, paPages, enmAccount, &Strategy);
2689	if ( rc != VERR_GMM_SEED_ME
2690	\|\| pGMM->fLegacyAllocationMode)
2691	break;
2692	rc = gmmR0AllocateMoreChunks(pGMM, pGVM, &pGMM->Private, cPages, &Strategy);
2693	}
2694	}
2695	else
2696	rc = VERR_WRONG_ORDER;
2697	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2698	}
2699	else
2700	rc = VERR_INTERNAL_ERROR_5;
2701	gmmR0MutexRelease(pGMM);
2702	LogFlow(("GMMR0AllocatePages: returns %Rrc\n", rc));
2703	return rc;
2704	}
2705
2706
2707	/**
2708	* VMMR0 request wrapper for GMMR0AllocatePages.
2709	*
2710	* @returns see GMMR0AllocatePages.
2711	* @param pVM Pointer to the shared VM structure.
2712	* @param idCpu VCPU id
2713	* @param pReq The request packet.
2714	*/
2715	GMMR0DECL(int) GMMR0AllocatePagesReq(PVM pVM, VMCPUID idCpu, PGMMALLOCATEPAGESREQ pReq)
2716	{
2717	/*
2718	* Validate input and pass it on.
2719	*/
2720	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2721	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2722	AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0]),
2723	("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0])),
2724	VERR_INVALID_PARAMETER);
2725	AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[pReq->cPages]),
2726	("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[pReq->cPages])),
2727	VERR_INVALID_PARAMETER);
2728
2729	return GMMR0AllocatePages(pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
2730	}
2731
2732
2733	/**
2734	* Allocate a large page to represent guest RAM
2735	*
2736	* The allocated pages are not cleared and will contains random garbage.
2737	*
2738	* @returns VBox status code:
2739	* @retval VINF_SUCCESS on success.
2740	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2741	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2742	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2743	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2744	* that is we're trying to allocate more than we've reserved.
2745	* @returns see GMMR0AllocatePages.
2746	* @param pVM Pointer to the shared VM structure.
2747	* @param idCpu VCPU id
2748	* @param cbPage Large page size
2749	*/
2750	GMMR0DECL(int) GMMR0AllocateLargePage(PVM pVM, VMCPUID idCpu, uint32_t cbPage, uint32_t pIdPage, RTHCPHYS pHCPhys)
2751	{
2752	LogFlow(("GMMR0AllocateLargePage: pVM=%p cbPage=%x\n", pVM, cbPage));
2753
2754	AssertReturn(cbPage == GMM_CHUNK_SIZE, VERR_INVALID_PARAMETER);
2755	AssertPtrReturn(pIdPage, VERR_INVALID_PARAMETER);
2756	AssertPtrReturn(pHCPhys, VERR_INVALID_PARAMETER);
2757
2758	/*
2759	* Validate, get basics and take the semaphore.
2760	*/
2761	PGMM pGMM;
2762	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2763	PGVM pGVM;
2764	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2765	if (RT_FAILURE(rc))
2766	return rc;
2767
2768	/* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
2769	if (pGMM->fLegacyAllocationMode)
2770	return VERR_NOT_SUPPORTED;
2771
2772	*pHCPhys = NIL_RTHCPHYS;
2773	*pIdPage = NIL_GMM_PAGEID;
2774
2775	gmmR0MutexAcquire(pGMM);
2776	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2777	{
2778	const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
2779	if (RT_UNLIKELY( pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cPages
2780	> pGVM->gmm.s.Reserved.cBasePages))
2781	{
2782	Log(("GMMR0AllocateLargePage: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
2783	pGVM->gmm.s.Reserved.cBasePages, pGVM->gmm.s.Allocated.cBasePages, cPages));
2784	gmmR0MutexRelease(pGMM);
2785	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2786	}
2787
2788	/*
2789	* Allocate a new large page chunk.
2790	*
2791	* Note! We leave the giant GMM lock temporarily as the allocation might
2792	* take a long time. gmmR0RegisterChunk will retake it (ugly).
2793	*/
2794	AssertCompile(GMM_CHUNK_SIZE == _2M);
2795	gmmR0MutexRelease(pGMM);
2796
2797	RTR0MEMOBJ hMemObj;
2798	rc = RTR0MemObjAllocPhysEx(&hMemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS, GMM_CHUNK_SIZE);
2799	if (RT_SUCCESS(rc))
2800	{
2801	PGMMCHUNK pChunk;
2802	rc = gmmR0RegisterChunk(pGMM, &pGMM->Private, hMemObj, pGVM->hSelf, GMM_CHUNK_FLAGS_LARGE_PAGE, &pChunk);
2803	if (RT_SUCCESS(rc))
2804	{
2805	/*
2806	* Allocate all the pages in the chunk.
2807	*/
2808	/* Unlink the new chunk from the free list. */
2809	gmmR0UnlinkChunk(pChunk);
2810
2811	/** @todo rewrite this to skip the looping. */
2812	/* Allocate all pages. */
2813	GMMPAGEDESC PageDesc;
2814	gmmR0AllocatePage(pGMM, pGVM->hSelf, pChunk, &PageDesc);
2815
2816	/* Return the first page as we'll use the whole chunk as one big page. */
2817	*pIdPage = PageDesc.idPage;
2818	*pHCPhys = PageDesc.HCPhysGCPhys;
2819
2820	for (unsigned i = 1; i < cPages; i++)
2821	gmmR0AllocatePage(pGMM, pGVM->hSelf, pChunk, &PageDesc);
2822
2823	/* Update accounting. */
2824	pGVM->gmm.s.Allocated.cBasePages += cPages;
2825	pGVM->gmm.s.cPrivatePages += cPages;
2826	pGMM->cAllocatedPages += cPages;
2827
2828	gmmR0LinkChunk(pChunk, &pGMM->Private);
2829	gmmR0MutexRelease(pGMM);
2830	}
2831	else
2832	RTR0MemObjFree(hMemObj, false /* fFreeMappings */);
2833	}
2834	}
2835	else
2836	{
2837	gmmR0MutexRelease(pGMM);
2838	rc = VERR_INTERNAL_ERROR_5;
2839	}
2840
2841	LogFlow(("GMMR0AllocateLargePage: returns %Rrc\n", rc));
2842	return rc;
2843	}
2844
2845
2846	/**
2847	* Free a large page
2848	*
2849	* @returns VBox status code:
2850	* @param pVM Pointer to the shared VM structure.
2851	* @param idCpu VCPU id
2852	* @param idPage Large page id
2853	*/
2854	GMMR0DECL(int) GMMR0FreeLargePage(PVM pVM, VMCPUID idCpu, uint32_t idPage)
2855	{
2856	LogFlow(("GMMR0FreeLargePage: pVM=%p idPage=%x\n", pVM, idPage));
2857
2858	/*
2859	* Validate, get basics and take the semaphore.
2860	*/
2861	PGMM pGMM;
2862	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2863	PGVM pGVM;
2864	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2865	if (RT_FAILURE(rc))
2866	return rc;
2867
2868	/* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
2869	if (pGMM->fLegacyAllocationMode)
2870	return VERR_NOT_SUPPORTED;
2871
2872	gmmR0MutexAcquire(pGMM);
2873	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2874	{
2875	const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
2876
2877	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages < cPages))
2878	{
2879	Log(("GMMR0FreeLargePage: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cBasePages, cPages));
2880	gmmR0MutexRelease(pGMM);
2881	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2882	}
2883
2884	PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
2885	if (RT_LIKELY( pPage
2886	&& GMM_PAGE_IS_PRIVATE(pPage)))
2887	{
2888	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2889	Assert(pChunk);
2890	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2891	Assert(pChunk->cPrivate > 0);
2892
2893	/* Release the memory immediately. */
2894	gmmR0FreeChunk(pGMM, NULL, pChunk, false /fRelaxedSem/); /** @todo this can be relaxed too! */
2895
2896	/* Update accounting. */
2897	pGVM->gmm.s.Allocated.cBasePages -= cPages;
2898	pGVM->gmm.s.cPrivatePages -= cPages;
2899	pGMM->cAllocatedPages -= cPages;
2900	}
2901	else
2902	rc = VERR_GMM_PAGE_NOT_FOUND;
2903	}
2904	else
2905	rc = VERR_INTERNAL_ERROR_5;
2906
2907	gmmR0MutexRelease(pGMM);
2908	LogFlow(("GMMR0FreeLargePage: returns %Rrc\n", rc));
2909	return rc;
2910	}
2911
2912
2913	/**
2914	* VMMR0 request wrapper for GMMR0FreeLargePage.
2915	*
2916	* @returns see GMMR0FreeLargePage.
2917	* @param pVM Pointer to the shared VM structure.
2918	* @param idCpu VCPU id
2919	* @param pReq The request packet.
2920	*/
2921	GMMR0DECL(int) GMMR0FreeLargePageReq(PVM pVM, VMCPUID idCpu, PGMMFREELARGEPAGEREQ pReq)
2922	{
2923	/*
2924	* Validate input and pass it on.
2925	*/
2926	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2927	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2928	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMFREEPAGESREQ),
2929	("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(GMMFREEPAGESREQ)),
2930	VERR_INVALID_PARAMETER);
2931
2932	return GMMR0FreeLargePage(pVM, idCpu, pReq->idPage);
2933	}
2934
2935
2936	/**
2937	* Frees a chunk, giving it back to the host OS.
2938	*
2939	* @param pGMM Pointer to the GMM instance.
2940	* @param pGVM This is set when called from GMMR0CleanupVM so we can
2941	* unmap and free the chunk in one go.
2942	* @param pChunk The chunk to free.
2943	* @param fRelaxedSem Whether we can release the semaphore while doing the
2944	* freeing (@c true) or not.
2945	*/
2946	static bool gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem)
2947	{
2948	Assert(pChunk->Core.Key != NIL_GMM_CHUNKID);
2949
2950	GMMR0CHUNKMTXSTATE MtxState;
2951	gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
2952
2953	/*
2954	* Cleanup hack! Unmap the chunk from the callers address space.
2955	* This shouldn't happen, so screw lock contention...
2956	*/
2957	if ( pChunk->cMappingsX
2958	&& !pGMM->fLegacyAllocationMode
2959	&& pGVM)
2960	gmmR0UnmapChunkLocked(pGMM, pGVM, pChunk);
2961
2962	/*
2963	* If there are current mappings of the chunk, then request the
2964	* VMs to unmap them. Reposition the chunk in the free list so
2965	* it won't be a likely candidate for allocations.
2966	*/
2967	if (pChunk->cMappingsX)
2968	{
2969	/** @todo R0 -> VM request */
2970	/* The chunk can be mapped by more than one VM if fBoundMemoryMode is false! */
2971	Log(("gmmR0FreeChunk: chunk still has %d/%d mappings; don't free!\n", pChunk->cMappingsX));
2972	gmmR0ChunkMutexRelease(&MtxState, pChunk);
2973	return false;
2974	}
2975
2976
2977	/*
2978	* Save and trash the handle.
2979	*/
2980	RTR0MEMOBJ const hMemObj = pChunk->hMemObj;
2981	pChunk->hMemObj = NIL_RTR0MEMOBJ;
2982
2983	/*
2984	* Unlink it from everywhere.
2985	*/
2986	gmmR0UnlinkChunk(pChunk);
2987
2988	RTListNodeRemove(&pChunk->ListNode);
2989
2990	PAVLU32NODECORE pCore = RTAvlU32Remove(&pGMM->pChunks, pChunk->Core.Key);
2991	Assert(pCore == &pChunk->Core); NOREF(pCore);
2992
2993	PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(pChunk->Core.Key)];
2994	if (pTlbe->pChunk == pChunk)
2995	{
2996	pTlbe->idChunk = NIL_GMM_CHUNKID;
2997	pTlbe->pChunk = NULL;
2998	}
2999
3000	Assert(pGMM->cChunks > 0);
3001	pGMM->cChunks--;
3002
3003	/*
3004	* Free the Chunk ID before dropping the locks and freeing the rest.
3005	*/
3006	gmmR0FreeChunkId(pGMM, pChunk->Core.Key);
3007	pChunk->Core.Key = NIL_GMM_CHUNKID;
3008
3009	pGMM->cFreedChunks++;
3010
3011	gmmR0ChunkMutexRelease(&MtxState, NULL);
3012	if (fRelaxedSem)
3013	gmmR0MutexRelease(pGMM);
3014
3015	RTMemFree(pChunk->paMappingsX);
3016	pChunk->paMappingsX = NULL;
3017
3018	RTMemFree(pChunk);
3019
3020	int rc = RTR0MemObjFree(hMemObj, false /* fFreeMappings */);
3021	AssertLogRelRC(rc);
3022
3023	if (fRelaxedSem)
3024	gmmR0MutexAcquire(pGMM);
3025	return fRelaxedSem;
3026	}
3027
3028
3029	/**
3030	* Free page worker.
3031	*
3032	* The caller does all the statistic decrementing, we do all the incrementing.
3033	*
3034	* @param pGMM Pointer to the GMM instance data.
3035	* @param pChunk Pointer to the chunk this page belongs to.
3036	* @param idPage The Page ID.
3037	* @param pPage Pointer to the page.
3038	*/
3039	static void gmmR0FreePageWorker(PGMM pGMM, PGMMCHUNK pChunk, uint32_t idPage, PGMMPAGE pPage)
3040	{
3041	Log3(("F pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x\n",
3042	pPage, pPage - &pChunk->aPages[0], idPage, pPage->Common.u2State, pChunk->iFreeHead)); NOREF(idPage);
3043
3044	/*
3045	* Put the page on the free list.
3046	*/
3047	pPage->u = 0;
3048	pPage->Free.u2State = GMM_PAGE_STATE_FREE;
3049	Assert(pChunk->iFreeHead < RT_ELEMENTS(pChunk->aPages) \|\| pChunk->iFreeHead == UINT16_MAX);
3050	pPage->Free.iNext = pChunk->iFreeHead;
3051	pChunk->iFreeHead = pPage - &pChunk->aPages[0];
3052
3053	/*
3054	* Update statistics (the cShared/cPrivate stats are up to date already),
3055	* and relink the chunk if necessary.
3056	*/
3057	unsigned const cFree = pChunk->cFree;
3058	if ( !cFree
3059	\|\| gmmR0SelectFreeSetList(cFree) != gmmR0SelectFreeSetList(cFree + 1))
3060	{
3061	gmmR0UnlinkChunk(pChunk);
3062	pChunk->cFree++;
3063	gmmR0LinkChunk(pChunk, pChunk->cShared ? &pGMM->Shared : &pGMM->Private);
3064	}
3065	else
3066	{
3067	pChunk->cFree = cFree + 1;
3068	pChunk->pSet->cFreePages++;
3069	}
3070
3071	/*
3072	* If the chunk becomes empty, consider giving memory back to the host OS.
3073	*
3074	* The current strategy is to try give it back if there are other chunks
3075	* in this free list, meaning if there are at least 240 free pages in this
3076	* category. Note that since there are probably mappings of the chunk,
3077	* it won't be freed up instantly, which probably screws up this logic
3078	* a bit...
3079	*/
3080	/** @todo Do this on the way out. */
3081	if (RT_UNLIKELY( pChunk->cFree == GMM_CHUNK_NUM_PAGES
3082	&& pChunk->pFreeNext
3083	&& pChunk->pFreePrev /** @todo this is probably misfiring, see reset... */
3084	&& !pGMM->fLegacyAllocationMode))
3085	gmmR0FreeChunk(pGMM, NULL, pChunk, false);
3086
3087	}
3088
3089
3090	/**
3091	* Frees a shared page, the page is known to exist and be valid and such.
3092	*
3093	* @param pGMM Pointer to the GMM instance.
3094	* @param idPage The Page ID
3095	* @param pPage The page structure.
3096	*/
3097	DECLINLINE(void) gmmR0FreeSharedPage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage)
3098	{
3099	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
3100	Assert(pChunk);
3101	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
3102	Assert(pChunk->cShared > 0);
3103	Assert(pGMM->cSharedPages > 0);
3104	Assert(pGMM->cAllocatedPages > 0);
3105	Assert(!pPage->Shared.cRefs);
3106
3107	pChunk->cShared--;
3108	pGMM->cAllocatedPages--;
3109	pGMM->cSharedPages--;
3110	gmmR0FreePageWorker(pGMM, pChunk, idPage, pPage);
3111	}
3112
3113	#ifdef VBOX_WITH_PAGE_SHARING
3114
3115	/**
3116	* Converts a private page to a shared page, the page is known to exist and be valid and such.
3117	*
3118	* @param pGMM Pointer to the GMM instance.
3119	* @param pGVM Pointer to the GVM instance.
3120	* @param HCPhys Host physical address
3121	* @param idPage The Page ID
3122	* @param pPage The page structure.
3123	*/
3124	DECLINLINE(void) gmmR0ConvertToSharedPage(PGMM pGMM, PGVM pGVM, RTHCPHYS HCPhys, uint32_t idPage, PGMMPAGE pPage)
3125	{
3126	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
3127	Assert(pChunk);
3128	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
3129	Assert(GMM_PAGE_IS_PRIVATE(pPage));
3130
3131	pChunk->cPrivate--;
3132	pChunk->cShared++;
3133
3134	pGMM->cSharedPages++;
3135
3136	pGVM->gmm.s.cSharedPages++;
3137	pGVM->gmm.s.cPrivatePages--;
3138
3139	/* Modify the page structure. */
3140	pPage->Shared.pfn = (uint32_t)(uint64_t)(HCPhys >> PAGE_SHIFT);
3141	pPage->Shared.cRefs = 1;
3142	pPage->Common.u2State = GMM_PAGE_STATE_SHARED;
3143	}
3144
3145
3146	/**
3147	* Increase the use count of a shared page, the page is known to exist and be valid and such.
3148	*
3149	* @param pGMM Pointer to the GMM instance.
3150	* @param pGVM Pointer to the GVM instance.
3151	* @param pPage The page structure.
3152	*/
3153	DECLINLINE(void) gmmR0UseSharedPage(PGMM pGMM, PGVM pGVM, PGMMPAGE pPage)
3154	{
3155	Assert(pGMM->cSharedPages > 0);
3156	Assert(pGMM->cAllocatedPages > 0);
3157
3158	pGMM->cDuplicatePages++;
3159
3160	pPage->Shared.cRefs++;
3161	pGVM->gmm.s.cSharedPages++;
3162	pGVM->gmm.s.Allocated.cBasePages++;
3163	}
3164
3165	#endif /* VBOX_WITH_PAGE_SHARING */
3166
3167	/**
3168	* Frees a private page, the page is known to exist and be valid and such.
3169	*
3170	* @param pGMM Pointer to the GMM instance.
3171	* @param idPage The Page ID
3172	* @param pPage The page structure.
3173	*/
3174	DECLINLINE(void) gmmR0FreePrivatePage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage)
3175	{
3176	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
3177	Assert(pChunk);
3178	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
3179	Assert(pChunk->cPrivate > 0);
3180	Assert(pGMM->cAllocatedPages > 0);
3181
3182	pChunk->cPrivate--;
3183	pGMM->cAllocatedPages--;
3184	gmmR0FreePageWorker(pGMM, pChunk, idPage, pPage);
3185	}
3186
3187
3188	/**
3189	* Common worker for GMMR0FreePages and GMMR0BalloonedPages.
3190	*
3191	* @returns VBox status code:
3192	* @retval xxx
3193	*
3194	* @param pGMM Pointer to the GMM instance data.
3195	* @param pGVM Pointer to the shared VM structure.
3196	* @param cPages The number of pages to free.
3197	* @param paPages Pointer to the page descriptors.
3198	* @param enmAccount The account this relates to.
3199	*/
3200	static int gmmR0FreePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
3201	{
3202	/*
3203	* Check that the request isn't impossible wrt to the account status.
3204	*/
3205	switch (enmAccount)
3206	{
3207	case GMMACCOUNT_BASE:
3208	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages < cPages))
3209	{
3210	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cBasePages, cPages));
3211	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3212	}
3213	break;
3214	case GMMACCOUNT_SHADOW:
3215	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cShadowPages < cPages))
3216	{
3217	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cShadowPages, cPages));
3218	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3219	}
3220	break;
3221	case GMMACCOUNT_FIXED:
3222	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cFixedPages < cPages))
3223	{
3224	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cFixedPages, cPages));
3225	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3226	}
3227	break;
3228	default:
3229	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
3230	}
3231
3232	/*
3233	* Walk the descriptors and free the pages.
3234	*
3235	* Statistics (except the account) are being updated as we go along,
3236	* unlike the alloc code. Also, stop on the first error.
3237	*/
3238	int rc = VINF_SUCCESS;
3239	uint32_t iPage;
3240	for (iPage = 0; iPage < cPages; iPage++)
3241	{
3242	uint32_t idPage = paPages[iPage].idPage;
3243	PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
3244	if (RT_LIKELY(pPage))
3245	{
3246	if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
3247	{
3248	if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
3249	{
3250	Assert(pGVM->gmm.s.cPrivatePages);
3251	pGVM->gmm.s.cPrivatePages--;
3252	gmmR0FreePrivatePage(pGMM, idPage, pPage);
3253	}
3254	else
3255	{
3256	Log(("gmmR0AllocatePages: #%#x/%#x: not owner! hGVM=%#x hSelf=%#x\n", iPage, idPage,
3257	pPage->Private.hGVM, pGVM->hSelf));
3258	rc = VERR_GMM_NOT_PAGE_OWNER;
3259	break;
3260	}
3261	}
3262	else if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
3263	{
3264	Assert(pGVM->gmm.s.cSharedPages);
3265	pGVM->gmm.s.cSharedPages--;
3266	Assert(pPage->Shared.cRefs);
3267	if (!--pPage->Shared.cRefs)
3268	gmmR0FreeSharedPage(pGMM, idPage, pPage);
3269	else
3270	{
3271	Assert(pGMM->cDuplicatePages);
3272	pGMM->cDuplicatePages--;
3273	}
3274	}
3275	else
3276	{
3277	Log(("gmmR0AllocatePages: #%#x/%#x: already free!\n", iPage, idPage));
3278	rc = VERR_GMM_PAGE_ALREADY_FREE;
3279	break;
3280	}
3281	}
3282	else
3283	{
3284	Log(("gmmR0AllocatePages: #%#x/%#x: not found!\n", iPage, idPage));
3285	rc = VERR_GMM_PAGE_NOT_FOUND;
3286	break;
3287	}
3288	paPages[iPage].idPage = NIL_GMM_PAGEID;
3289	}
3290
3291	/*
3292	* Update the account.
3293	*/
3294	switch (enmAccount)
3295	{
3296	case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages -= iPage; break;
3297	case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages -= iPage; break;
3298	case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages -= iPage; break;
3299	default:
3300	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
3301	}
3302
3303	/*
3304	* Any threshold stuff to be done here?
3305	*/
3306
3307	return rc;
3308	}
3309
3310
3311	/**
3312	* Free one or more pages.
3313	*
3314	* This is typically used at reset time or power off.
3315	*
3316	* @returns VBox status code:
3317	* @retval xxx
3318	*
3319	* @param pVM Pointer to the shared VM structure.
3320	* @param idCpu VCPU id
3321	* @param cPages The number of pages to allocate.
3322	* @param paPages Pointer to the page descriptors containing the Page IDs for each page.
3323	* @param enmAccount The account this relates to.
3324	* @thread EMT.
3325	*/
3326	GMMR0DECL(int) GMMR0FreePages(PVM pVM, VMCPUID idCpu, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
3327	{
3328	LogFlow(("GMMR0FreePages: pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pVM, cPages, paPages, enmAccount));
3329
3330	/*
3331	* Validate input and get the basics.
3332	*/
3333	PGMM pGMM;
3334	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3335	PGVM pGVM;
3336	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3337	if (RT_FAILURE(rc))
3338	return rc;
3339
3340	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
3341	AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
3342	AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
3343
3344	for (unsigned iPage = 0; iPage < cPages; iPage++)
3345	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
3346	/\|\| paPages[iPage].idPage == NIL_GMM_PAGEID/,
3347	("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
3348
3349	/*
3350	* Take the semaphore and call the worker function.
3351	*/
3352	gmmR0MutexAcquire(pGMM);
3353	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3354	{
3355	rc = gmmR0FreePages(pGMM, pGVM, cPages, paPages, enmAccount);
3356	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3357	}
3358	else
3359	rc = VERR_INTERNAL_ERROR_5;
3360	gmmR0MutexRelease(pGMM);
3361	LogFlow(("GMMR0FreePages: returns %Rrc\n", rc));
3362	return rc;
3363	}
3364
3365
3366	/**
3367	* VMMR0 request wrapper for GMMR0FreePages.
3368	*
3369	* @returns see GMMR0FreePages.
3370	* @param pVM Pointer to the shared VM structure.
3371	* @param idCpu VCPU id
3372	* @param pReq The request packet.
3373	*/
3374	GMMR0DECL(int) GMMR0FreePagesReq(PVM pVM, VMCPUID idCpu, PGMMFREEPAGESREQ pReq)
3375	{
3376	/*
3377	* Validate input and pass it on.
3378	*/
3379	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3380	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3381	AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0]),
3382	("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0])),
3383	VERR_INVALID_PARAMETER);
3384	AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[pReq->cPages]),
3385	("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[pReq->cPages])),
3386	VERR_INVALID_PARAMETER);
3387
3388	return GMMR0FreePages(pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
3389	}
3390
3391
3392	/**
3393	* Report back on a memory ballooning request.
3394	*
3395	* The request may or may not have been initiated by the GMM. If it was initiated
3396	* by the GMM it is important that this function is called even if no pages were
3397	* ballooned.
3398	*
3399	* @returns VBox status code:
3400	* @retval VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH
3401	* @retval VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH
3402	* @retval VERR_GMM_OVERCOMMITTED_TRY_AGAIN_IN_A_BIT - reset condition
3403	* indicating that we won't necessarily have sufficient RAM to boot
3404	* the VM again and that it should pause until this changes (we'll try
3405	* balloon some other VM). (For standard deflate we have little choice
3406	* but to hope the VM won't use the memory that was returned to it.)
3407	*
3408	* @param pVM Pointer to the shared VM structure.
3409	* @param idCpu VCPU id
3410	* @param enmAction Inflate/deflate/reset
3411	* @param cBalloonedPages The number of pages that was ballooned.
3412	*
3413	* @thread EMT.
3414	*/
3415	GMMR0DECL(int) GMMR0BalloonedPages(PVM pVM, VMCPUID idCpu, GMMBALLOONACTION enmAction, uint32_t cBalloonedPages)
3416	{
3417	LogFlow(("GMMR0BalloonedPages: pVM=%p enmAction=%d cBalloonedPages=%#x\n",
3418	pVM, enmAction, cBalloonedPages));
3419
3420	AssertMsgReturn(cBalloonedPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cBalloonedPages), VERR_INVALID_PARAMETER);
3421
3422	/*
3423	* Validate input and get the basics.
3424	*/
3425	PGMM pGMM;
3426	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3427	PGVM pGVM;
3428	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3429	if (RT_FAILURE(rc))
3430	return rc;
3431
3432	/*
3433	* Take the semaphore and do some more validations.
3434	*/
3435	gmmR0MutexAcquire(pGMM);
3436	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3437	{
3438	switch (enmAction)
3439	{
3440	case GMMBALLOONACTION_INFLATE:
3441	{
3442	if (RT_LIKELY(pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cBalloonedPages <= pGVM->gmm.s.Reserved.cBasePages))
3443	{
3444	/*
3445	* Record the ballooned memory.
3446	*/
3447	pGMM->cBalloonedPages += cBalloonedPages;
3448	if (pGVM->gmm.s.cReqBalloonedPages)
3449	{
3450	/* Codepath never taken. Might be interesting in the future to request ballooned memory from guests in low memory conditions.. */
3451	AssertFailed();
3452
3453	pGVM->gmm.s.cBalloonedPages += cBalloonedPages;
3454	pGVM->gmm.s.cReqActuallyBalloonedPages += cBalloonedPages;
3455	Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx Req=%#llx Actual=%#llx (pending)\n", cBalloonedPages,
3456	pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqBalloonedPages, pGVM->gmm.s.cReqActuallyBalloonedPages));
3457	}
3458	else
3459	{
3460	pGVM->gmm.s.cBalloonedPages += cBalloonedPages;
3461	Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx (user)\n",
3462	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages));
3463	}
3464	}
3465	else
3466	{
3467	Log(("GMMR0BalloonedPages: cBasePages=%#llx Total=%#llx cBalloonedPages=%#llx Reserved=%#llx\n",
3468	pGVM->gmm.s.Allocated.cBasePages, pGVM->gmm.s.cBalloonedPages, cBalloonedPages, pGVM->gmm.s.Reserved.cBasePages));
3469	rc = VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3470	}
3471	break;
3472	}
3473
3474	case GMMBALLOONACTION_DEFLATE:
3475	{
3476	/* Deflate. */
3477	if (pGVM->gmm.s.cBalloonedPages >= cBalloonedPages)
3478	{
3479	/*
3480	* Record the ballooned memory.
3481	*/
3482	Assert(pGMM->cBalloonedPages >= cBalloonedPages);
3483	pGMM->cBalloonedPages -= cBalloonedPages;
3484	pGVM->gmm.s.cBalloonedPages -= cBalloonedPages;
3485	if (pGVM->gmm.s.cReqDeflatePages)
3486	{
3487	AssertFailed(); /* This is path is for later. */
3488	Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx Req=%#llx\n",
3489	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqDeflatePages));
3490
3491	/*
3492	* Anything we need to do here now when the request has been completed?
3493	*/
3494	pGVM->gmm.s.cReqDeflatePages = 0;
3495	}
3496	else
3497	Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx (user)\n",
3498	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages));
3499	}
3500	else
3501	{
3502	Log(("GMMR0BalloonedPages: Total=%#llx cBalloonedPages=%#llx\n", pGVM->gmm.s.cBalloonedPages, cBalloonedPages));
3503	rc = VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH;
3504	}
3505	break;
3506	}
3507
3508	case GMMBALLOONACTION_RESET:
3509	{
3510	/* Reset to an empty balloon. */
3511	Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.cBalloonedPages);
3512
3513	pGMM->cBalloonedPages -= pGVM->gmm.s.cBalloonedPages;
3514	pGVM->gmm.s.cBalloonedPages = 0;
3515	break;
3516	}
3517
3518	default:
3519	rc = VERR_INVALID_PARAMETER;
3520	break;
3521	}
3522	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3523	}
3524	else
3525	rc = VERR_INTERNAL_ERROR_5;
3526
3527	gmmR0MutexRelease(pGMM);
3528	LogFlow(("GMMR0BalloonedPages: returns %Rrc\n", rc));
3529	return rc;
3530	}
3531
3532
3533	/**
3534	* VMMR0 request wrapper for GMMR0BalloonedPages.
3535	*
3536	* @returns see GMMR0BalloonedPages.
3537	* @param pVM Pointer to the shared VM structure.
3538	* @param idCpu VCPU id
3539	* @param pReq The request packet.
3540	*/
3541	GMMR0DECL(int) GMMR0BalloonedPagesReq(PVM pVM, VMCPUID idCpu, PGMMBALLOONEDPAGESREQ pReq)
3542	{
3543	/*
3544	* Validate input and pass it on.
3545	*/
3546	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3547	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3548	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMBALLOONEDPAGESREQ),
3549	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMBALLOONEDPAGESREQ)),
3550	VERR_INVALID_PARAMETER);
3551
3552	return GMMR0BalloonedPages(pVM, idCpu, pReq->enmAction, pReq->cBalloonedPages);
3553	}
3554
3555	/**
3556	* Return memory statistics for the hypervisor
3557	*
3558	* @returns VBox status code:
3559	* @param pVM Pointer to the shared VM structure.
3560	* @param pReq The request packet.
3561	*/
3562	GMMR0DECL(int) GMMR0QueryHypervisorMemoryStatsReq(PVM pVM, PGMMMEMSTATSREQ pReq)
3563	{
3564	/*
3565	* Validate input and pass it on.
3566	*/
3567	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3568	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3569	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
3570	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
3571	VERR_INVALID_PARAMETER);
3572
3573	/*
3574	* Validate input and get the basics.
3575	*/
3576	PGMM pGMM;
3577	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3578	pReq->cAllocPages = pGMM->cAllocatedPages;
3579	pReq->cFreePages = (pGMM->cChunks << (GMM_CHUNK_SHIFT- PAGE_SHIFT)) - pGMM->cAllocatedPages;
3580	pReq->cBalloonedPages = pGMM->cBalloonedPages;
3581	pReq->cMaxPages = pGMM->cMaxPages;
3582	pReq->cSharedPages = pGMM->cDuplicatePages;
3583	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3584
3585	return VINF_SUCCESS;
3586	}
3587
3588	/**
3589	* Return memory statistics for the VM
3590	*
3591	* @returns VBox status code:
3592	* @param pVM Pointer to the shared VM structure.
3593	* @parma idCpu Cpu id.
3594	* @param pReq The request packet.
3595	*/
3596	GMMR0DECL(int) GMMR0QueryMemoryStatsReq(PVM pVM, VMCPUID idCpu, PGMMMEMSTATSREQ pReq)
3597	{
3598	/*
3599	* Validate input and pass it on.
3600	*/
3601	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3602	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3603	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
3604	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
3605	VERR_INVALID_PARAMETER);
3606
3607	/*
3608	* Validate input and get the basics.
3609	*/
3610	PGMM pGMM;
3611	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3612	PGVM pGVM;
3613	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3614	if (RT_FAILURE(rc))
3615	return rc;
3616
3617	/*
3618	* Take the semaphore and do some more validations.
3619	*/
3620	gmmR0MutexAcquire(pGMM);
3621	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3622	{
3623	pReq->cAllocPages = pGVM->gmm.s.Allocated.cBasePages;
3624	pReq->cBalloonedPages = pGVM->gmm.s.cBalloonedPages;
3625	pReq->cMaxPages = pGVM->gmm.s.Reserved.cBasePages;
3626	pReq->cFreePages = pReq->cMaxPages - pReq->cAllocPages;
3627	}
3628	else
3629	rc = VERR_INTERNAL_ERROR_5;
3630
3631	gmmR0MutexRelease(pGMM);
3632	LogFlow(("GMMR3QueryVMMemoryStats: returns %Rrc\n", rc));
3633	return rc;
3634	}
3635
3636
3637	/**
3638	* Worker for gmmR0UnmapChunk and gmmr0FreeChunk.
3639	*
3640	* Don't call this in legacy allocation mode!
3641	*
3642	* @returns VBox status code.
3643	* @param pGMM Pointer to the GMM instance data.
3644	* @param pGVM Pointer to the Global VM structure.
3645	* @param pChunk Pointer to the chunk to be unmapped.
3646	*/
3647	static int gmmR0UnmapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
3648	{
3649	Assert(!pGMM->fLegacyAllocationMode);
3650
3651	/*
3652	* Find the mapping and try unmapping it.
3653	*/
3654	uint32_t cMappings = pChunk->cMappingsX;
3655	for (uint32_t i = 0; i < cMappings; i++)
3656	{
3657	Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
3658	if (pChunk->paMappingsX[i].pGVM == pGVM)
3659	{
3660	/* unmap */
3661	int rc = RTR0MemObjFree(pChunk->paMappingsX[i].hMapObj, false /* fFreeMappings (NA) */);
3662	if (RT_SUCCESS(rc))
3663	{
3664	/* update the record. */
3665	cMappings--;
3666	if (i < cMappings)
3667	pChunk->paMappingsX[i] = pChunk->paMappingsX[cMappings];
3668	pChunk->paMappingsX[cMappings].hMapObj = NIL_RTR0MEMOBJ;
3669	pChunk->paMappingsX[cMappings].pGVM = NULL;
3670	Assert(pChunk->cMappingsX - 1U == cMappings);
3671	pChunk->cMappingsX = cMappings;
3672	}
3673
3674	return rc;
3675	}
3676	}
3677
3678	Log(("gmmR0UnmapChunk: Chunk %#x is not mapped into pGVM=%p/%#x\n", pChunk->Core.Key, pGVM, pGVM->hSelf));
3679	return VERR_GMM_CHUNK_NOT_MAPPED;
3680	}
3681
3682
3683	/**
3684	* Unmaps a chunk previously mapped into the address space of the current process.
3685	*
3686	* @returns VBox status code.
3687	* @param pGMM Pointer to the GMM instance data.
3688	* @param pGVM Pointer to the Global VM structure.
3689	* @param pChunk Pointer to the chunk to be unmapped.
3690	*/
3691	static int gmmR0UnmapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem)
3692	{
3693	if (!pGMM->fLegacyAllocationMode)
3694	{
3695	/*
3696	* Lock the chunk and if possible leave the giant GMM lock.
3697	*/
3698	GMMR0CHUNKMTXSTATE MtxState;
3699	int rc = gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk,
3700	fRelaxedSem ? GMMR0CHUNK_MTX_RETAKE_GIANT : GMMR0CHUNK_MTX_KEEP_GIANT);
3701	if (RT_SUCCESS(rc))
3702	{
3703	rc = gmmR0UnmapChunkLocked(pGMM, pGVM, pChunk);
3704	gmmR0ChunkMutexRelease(&MtxState, pChunk);
3705	}
3706	return rc;
3707	}
3708
3709	if (pChunk->hGVM == pGVM->hSelf)
3710	return VINF_SUCCESS;
3711
3712	Log(("gmmR0UnmapChunk: Chunk %#x is not mapped into pGVM=%p/%#x (legacy)\n", pChunk->Core.Key, pGVM, pGVM->hSelf));
3713	return VERR_GMM_CHUNK_NOT_MAPPED;
3714	}
3715
3716
3717	/**
3718	* Worker for gmmR0MapChunk.
3719	*
3720	* @returns VBox status code.
3721	* @param pGMM Pointer to the GMM instance data.
3722	* @param pGVM Pointer to the Global VM structure.
3723	* @param pChunk Pointer to the chunk to be mapped.
3724	* @param ppvR3 Where to store the ring-3 address of the mapping.
3725	* In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
3726	* contain the address of the existing mapping.
3727	*/
3728	static int gmmR0MapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
3729	{
3730	/*
3731	* If we're in legacy mode this is simple.
3732	*/
3733	if (pGMM->fLegacyAllocationMode)
3734	{
3735	if (pChunk->hGVM != pGVM->hSelf)
3736	{
3737	Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
3738	return VERR_GMM_CHUNK_NOT_FOUND;
3739	}
3740
3741	*ppvR3 = RTR0MemObjAddressR3(pChunk->hMemObj);
3742	return VINF_SUCCESS;
3743	}
3744
3745	/*
3746	* Check to see if the chunk is already mapped.
3747	*/
3748	for (uint32_t i = 0; i < pChunk->cMappingsX; i++)
3749	{
3750	Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
3751	if (pChunk->paMappingsX[i].pGVM == pGVM)
3752	{
3753	*ppvR3 = RTR0MemObjAddressR3(pChunk->paMappingsX[i].hMapObj);
3754	Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
3755	#ifdef VBOX_WITH_PAGE_SHARING
3756	/* The ring-3 chunk cache can be out of sync; don't fail. */
3757	return VINF_SUCCESS;
3758	#else
3759	return VERR_GMM_CHUNK_ALREADY_MAPPED;
3760	#endif
3761	}
3762	}
3763
3764	/*
3765	* Do the mapping.
3766	*/
3767	RTR0MEMOBJ hMapObj;
3768	int rc = RTR0MemObjMapUser(&hMapObj, pChunk->hMemObj, (RTR3PTR)-1, 0, RTMEM_PROT_READ \| RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
3769	if (RT_SUCCESS(rc))
3770	{
3771	/* reallocate the array? assumes few users per chunk (usually one). */
3772	unsigned iMapping = pChunk->cMappingsX;
3773	if ( iMapping <= 3
3774	\|\| (iMapping & 3) == 0)
3775	{
3776	unsigned cNewSize = iMapping <= 3
3777	? iMapping + 1
3778	: iMapping + 4;
3779	Assert(cNewSize < 4 \|\| RT_ALIGN_32(cNewSize, 4) == cNewSize);
3780	if (RT_UNLIKELY(cNewSize > UINT16_MAX))
3781	{
3782	rc = RTR0MemObjFree(hMapObj, false /* fFreeMappings (NA) */); AssertRC(rc);
3783	return VERR_GMM_TOO_MANY_CHUNK_MAPPINGS;
3784	}
3785
3786	void pvMappings = RTMemRealloc(pChunk->paMappingsX, cNewSize sizeof(pChunk->paMappingsX[0]));
3787	if (RT_UNLIKELY(!pvMappings))
3788	{
3789	rc = RTR0MemObjFree(hMapObj, false /* fFreeMappings (NA) */); AssertRC(rc);
3790	return VERR_NO_MEMORY;
3791	}
3792	pChunk->paMappingsX = (PGMMCHUNKMAP)pvMappings;
3793	}
3794
3795	/* insert new entry */
3796	pChunk->paMappingsX[iMapping].hMapObj = hMapObj;
3797	pChunk->paMappingsX[iMapping].pGVM = pGVM;
3798	Assert(pChunk->cMappingsX == iMapping);
3799	pChunk->cMappingsX = iMapping + 1;
3800
3801	*ppvR3 = RTR0MemObjAddressR3(hMapObj);
3802	}
3803
3804	return rc;
3805	}
3806
3807
3808	/**
3809	* Maps a chunk into the user address space of the current process.
3810	*
3811	* @returns VBox status code.
3812	* @param pGMM Pointer to the GMM instance data.
3813	* @param pGVM Pointer to the Global VM structure.
3814	* @param pChunk Pointer to the chunk to be mapped.
3815	* @param fRelaxedSem Whether we can release the semaphore while doing the
3816	* mapping (@c true) or not.
3817	* @param ppvR3 Where to store the ring-3 address of the mapping.
3818	* In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
3819	* contain the address of the existing mapping.
3820	*/
3821	static int gmmR0MapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem, PRTR3PTR ppvR3)
3822	{
3823	/*
3824	* Take the chunk lock and leave the giant GMM lock when possible, then
3825	* call the worker function.
3826	*/
3827	GMMR0CHUNKMTXSTATE MtxState;
3828	int rc = gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk,
3829	fRelaxedSem ? GMMR0CHUNK_MTX_RETAKE_GIANT : GMMR0CHUNK_MTX_KEEP_GIANT);
3830	if (RT_SUCCESS(rc))
3831	{
3832	rc = gmmR0MapChunkLocked(pGMM, pGVM, pChunk, ppvR3);
3833	gmmR0ChunkMutexRelease(&MtxState, pChunk);
3834	}
3835
3836	return rc;
3837	}
3838
3839
3840
3841	/**
3842	* Check if a chunk is mapped into the specified VM
3843	*
3844	* @returns mapped yes/no
3845	* @param pGMM Pointer to the GMM instance.
3846	* @param pGVM Pointer to the Global VM structure.
3847	* @param pChunk Pointer to the chunk to be mapped.
3848	* @param ppvR3 Where to store the ring-3 address of the mapping.
3849	*/
3850	static int gmmR0IsChunkMapped(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
3851	{
3852	GMMR0CHUNKMTXSTATE MtxState;
3853	gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
3854	for (uint32_t i = 0; i < pChunk->cMappingsX; i++)
3855	{
3856	Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
3857	if (pChunk->paMappingsX[i].pGVM == pGVM)
3858	{
3859	*ppvR3 = RTR0MemObjAddressR3(pChunk->paMappingsX[i].hMapObj);
3860	gmmR0ChunkMutexRelease(&MtxState, pChunk);
3861	return true;
3862	}
3863	}
3864	*ppvR3 = NULL;
3865	gmmR0ChunkMutexRelease(&MtxState, pChunk);
3866	return false;
3867	}
3868
3869
3870	/**
3871	* Map a chunk and/or unmap another chunk.
3872	*
3873	* The mapping and unmapping applies to the current process.
3874	*
3875	* This API does two things because it saves a kernel call per mapping when
3876	* when the ring-3 mapping cache is full.
3877	*
3878	* @returns VBox status code.
3879	* @param pVM The VM.
3880	* @param idChunkMap The chunk to map. NIL_GMM_CHUNKID if nothing to map.
3881	* @param idChunkUnmap The chunk to unmap. NIL_GMM_CHUNKID if nothing to unmap.
3882	* @param ppvR3 Where to store the address of the mapped chunk. NULL is ok if nothing to map.
3883	* @thread EMT
3884	*/
3885	GMMR0DECL(int) GMMR0MapUnmapChunk(PVM pVM, uint32_t idChunkMap, uint32_t idChunkUnmap, PRTR3PTR ppvR3)
3886	{
3887	LogFlow(("GMMR0MapUnmapChunk: pVM=%p idChunkMap=%#x idChunkUnmap=%#x ppvR3=%p\n",
3888	pVM, idChunkMap, idChunkUnmap, ppvR3));
3889
3890	/*
3891	* Validate input and get the basics.
3892	*/
3893	PGMM pGMM;
3894	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3895	PGVM pGVM;
3896	int rc = GVMMR0ByVM(pVM, &pGVM);
3897	if (RT_FAILURE(rc))
3898	return rc;
3899
3900	AssertCompile(NIL_GMM_CHUNKID == 0);
3901	AssertMsgReturn(idChunkMap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkMap), VERR_INVALID_PARAMETER);
3902	AssertMsgReturn(idChunkUnmap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkUnmap), VERR_INVALID_PARAMETER);
3903
3904	if ( idChunkMap == NIL_GMM_CHUNKID
3905	&& idChunkUnmap == NIL_GMM_CHUNKID)
3906	return VERR_INVALID_PARAMETER;
3907
3908	if (idChunkMap != NIL_GMM_CHUNKID)
3909	{
3910	AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3911	*ppvR3 = NIL_RTR3PTR;
3912	}
3913
3914	/*
3915	* Take the semaphore and do the work.
3916	*
3917	* The unmapping is done last since it's easier to undo a mapping than
3918	* undoing an unmapping. The ring-3 mapping cache cannot not be so big
3919	* that it pushes the user virtual address space to within a chunk of
3920	* it it's limits, so, no problem here.
3921	*/
3922	gmmR0MutexAcquire(pGMM);
3923	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3924	{
3925	PGMMCHUNK pMap = NULL;
3926	if (idChunkMap != NIL_GVM_HANDLE)
3927	{
3928	pMap = gmmR0GetChunk(pGMM, idChunkMap);
3929	if (RT_LIKELY(pMap))
3930	rc = gmmR0MapChunk(pGMM, pGVM, pMap, true /fRelaxedSem/, ppvR3);
3931	else
3932	{
3933	Log(("GMMR0MapUnmapChunk: idChunkMap=%#x\n", idChunkMap));
3934	rc = VERR_GMM_CHUNK_NOT_FOUND;
3935	}
3936	}
3937	/** @todo split this operation, the bail out might (theoretcially) not be
3938	* entirely safe. */
3939
3940	if ( idChunkUnmap != NIL_GMM_CHUNKID
3941	&& RT_SUCCESS(rc))
3942	{
3943	PGMMCHUNK pUnmap = gmmR0GetChunk(pGMM, idChunkUnmap);
3944	if (RT_LIKELY(pUnmap))
3945	rc = gmmR0UnmapChunk(pGMM, pGVM, pUnmap, true /fRelaxedSem/);
3946	else
3947	{
3948	Log(("GMMR0MapUnmapChunk: idChunkUnmap=%#x\n", idChunkUnmap));
3949	rc = VERR_GMM_CHUNK_NOT_FOUND;
3950	}
3951
3952	if (RT_FAILURE(rc) && pMap)
3953	gmmR0UnmapChunk(pGMM, pGVM, pMap, false /fRelaxedSem/);
3954	}
3955
3956	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3957	}
3958	else
3959	rc = VERR_INTERNAL_ERROR_5;
3960	gmmR0MutexRelease(pGMM);
3961
3962	LogFlow(("GMMR0MapUnmapChunk: returns %Rrc\n", rc));
3963	return rc;
3964	}
3965
3966
3967	/**
3968	* VMMR0 request wrapper for GMMR0MapUnmapChunk.
3969	*
3970	* @returns see GMMR0MapUnmapChunk.
3971	* @param pVM Pointer to the shared VM structure.
3972	* @param pReq The request packet.
3973	*/
3974	GMMR0DECL(int) GMMR0MapUnmapChunkReq(PVM pVM, PGMMMAPUNMAPCHUNKREQ pReq)
3975	{
3976	/*
3977	* Validate input and pass it on.
3978	*/
3979	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3980	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3981	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
3982
3983	return GMMR0MapUnmapChunk(pVM, pReq->idChunkMap, pReq->idChunkUnmap, &pReq->pvR3);
3984	}
3985
3986
3987	/**
3988	* Legacy mode API for supplying pages.
3989	*
3990	* The specified user address points to a allocation chunk sized block that
3991	* will be locked down and used by the GMM when the GM asks for pages.
3992	*
3993	* @returns VBox status code.
3994	* @param pVM The VM.
3995	* @param idCpu VCPU id
3996	* @param pvR3 Pointer to the chunk size memory block to lock down.
3997	*/
3998	GMMR0DECL(int) GMMR0SeedChunk(PVM pVM, VMCPUID idCpu, RTR3PTR pvR3)
3999	{
4000	/*
4001	* Validate input and get the basics.
4002	*/
4003	PGMM pGMM;
4004	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4005	PGVM pGVM;
4006	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
4007	if (RT_FAILURE(rc))
4008	return rc;
4009
4010	AssertPtrReturn(pvR3, VERR_INVALID_POINTER);
4011	AssertReturn(!(PAGE_OFFSET_MASK & pvR3), VERR_INVALID_POINTER);
4012
4013	if (!pGMM->fLegacyAllocationMode)
4014	{
4015	Log(("GMMR0SeedChunk: not in legacy allocation mode!\n"));
4016	return VERR_NOT_SUPPORTED;
4017	}
4018
4019	/*
4020	* Lock the memory and add it as new chunk with our hGVM.
4021	* (The GMM locking is done inside gmmR0RegisterChunk.)
4022	*/
4023	RTR0MEMOBJ MemObj;
4024	rc = RTR0MemObjLockUser(&MemObj, pvR3, GMM_CHUNK_SIZE, RTMEM_PROT_READ \| RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
4025	if (RT_SUCCESS(rc))
4026	{
4027	rc = gmmR0RegisterChunk(pGMM, &pGMM->Private, MemObj, pGVM->hSelf, 0 /fChunkFlags/, NULL);
4028	if (RT_SUCCESS(rc))
4029	gmmR0MutexRelease(pGMM);
4030	else
4031	RTR0MemObjFree(MemObj, false /* fFreeMappings */);
4032	}
4033
4034	LogFlow(("GMMR0SeedChunk: rc=%d (pvR3=%p)\n", rc, pvR3));
4035	return rc;
4036	}
4037
4038
4039	typedef struct
4040	{
4041	PAVLGCPTRNODECORE pNode;
4042	char *pszModuleName;
4043	char *pszVersion;
4044	VBOXOSFAMILY enmGuestOS;
4045	} GMMFINDMODULEBYNAME, *PGMMFINDMODULEBYNAME;
4046
4047	/**
4048	* Tree enumeration callback for finding identical modules by name and version
4049	*/
4050	DECLCALLBACK(int) gmmR0CheckForIdenticalModule(PAVLGCPTRNODECORE pNode, void *pvUser)
4051	{
4052	PGMMFINDMODULEBYNAME pInfo = (PGMMFINDMODULEBYNAME)pvUser;
4053	PGMMSHAREDMODULE pModule = (PGMMSHAREDMODULE)pNode;
4054
4055	if ( pInfo
4056	&& pInfo->enmGuestOS == pModule->enmGuestOS
4057	/** @todo replace with RTStrNCmp */
4058	&& !strcmp(pModule->szName, pInfo->pszModuleName)
4059	&& !strcmp(pModule->szVersion, pInfo->pszVersion))
4060	{
4061	pInfo->pNode = pNode;
4062	return 1; /* stop search */
4063	}
4064	return 0;
4065	}
4066
4067
4068	/**
4069	* Registers a new shared module for the VM
4070	*
4071	* @returns VBox status code.
4072	* @param pVM VM handle
4073	* @param idCpu VCPU id
4074	* @param enmGuestOS Guest OS type
4075	* @param pszModuleName Module name
4076	* @param pszVersion Module version
4077	* @param GCBaseAddr Module base address
4078	* @param cbModule Module size
4079	* @param cRegions Number of shared region descriptors
4080	* @param pRegions Shared region(s)
4081	*/
4082	GMMR0DECL(int) GMMR0RegisterSharedModule(PVM pVM, VMCPUID idCpu, VBOXOSFAMILY enmGuestOS, char pszModuleName, char pszVersion, RTGCPTR GCBaseAddr, uint32_t cbModule,
4083	unsigned cRegions, VMMDEVSHAREDREGIONDESC *pRegions)
4084	{
4085	#ifdef VBOX_WITH_PAGE_SHARING
4086	/*
4087	* Validate input and get the basics.
4088	*/
4089	PGMM pGMM;
4090	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4091	PGVM pGVM;
4092	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
4093	if (RT_FAILURE(rc))
4094	return rc;
4095
4096	Log(("GMMR0RegisterSharedModule %s %s base %RGv size %x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule));
4097
4098	/*
4099	* Take the semaphore and do some more validations.
4100	*/
4101	gmmR0MutexAcquire(pGMM);
4102	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4103	{
4104	bool fNewModule = false;
4105
4106	/* Check if this module is already locally registered. */
4107	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
4108	if (!pRecVM)
4109	{
4110	pRecVM = (PGMMSHAREDMODULEPERVM)RTMemAllocZ(RT_OFFSETOF(GMMSHAREDMODULEPERVM, aRegions[cRegions]));
4111	if (!pRecVM)
4112	{
4113	AssertFailed();
4114	rc = VERR_NO_MEMORY;
4115	goto end;
4116	}
4117	pRecVM->Core.Key = GCBaseAddr;
4118	pRecVM->cRegions = cRegions;
4119
4120	/* Save the region data as they can differ between VMs (address space scrambling or simply different loading order) */
4121	for (unsigned i = 0; i < cRegions; i++)
4122	{
4123	pRecVM->aRegions[i].GCRegionAddr = pRegions[i].GCRegionAddr;
4124	pRecVM->aRegions[i].cbRegion = RT_ALIGN_T(pRegions[i].cbRegion, PAGE_SIZE, uint32_t);
4125	pRecVM->aRegions[i].u32Alignment = 0;
4126	pRecVM->aRegions[i].paHCPhysPageID = NULL; /* unused */
4127	}
4128
4129	bool ret = RTAvlGCPtrInsert(&pGVM->gmm.s.pSharedModuleTree, &pRecVM->Core);
4130	Assert(ret);
4131
4132	Log(("GMMR0RegisterSharedModule: new local module %s\n", pszModuleName));
4133	fNewModule = true;
4134	}
4135	else
4136	rc = VINF_PGM_SHARED_MODULE_ALREADY_REGISTERED;
4137
4138	/* Check if this module is already globally registered. */
4139	PGMMSHAREDMODULE pGlobalModule = (PGMMSHAREDMODULE)RTAvlGCPtrGet(&pGMM->pGlobalSharedModuleTree, GCBaseAddr);
4140	if ( !pGlobalModule
4141	&& enmGuestOS == VBOXOSFAMILY_Windows64)
4142	{
4143	/* Two identical copies of e.g. Win7 x64 will typically not have a similar virtual address space layout for dlls or kernel modules.
4144	* Try to find identical binaries based on name and version.
4145	*/
4146	GMMFINDMODULEBYNAME Info;
4147
4148	Info.pNode = NULL;
4149	Info.pszVersion = pszVersion;
4150	Info.pszModuleName = pszModuleName;
4151	Info.enmGuestOS = enmGuestOS;
4152
4153	Log(("Try to find identical module %s\n", pszModuleName));
4154	int ret = RTAvlGCPtrDoWithAll(&pGMM->pGlobalSharedModuleTree, true /* fFromLeft */, gmmR0CheckForIdenticalModule, &Info);
4155	if (ret == 1)
4156	{
4157	Assert(Info.pNode);
4158	pGlobalModule = (PGMMSHAREDMODULE)Info.pNode;
4159	Log(("Found identical module at %RGv\n", pGlobalModule->Core.Key));
4160	}
4161	}
4162
4163	if (!pGlobalModule)
4164	{
4165	Assert(fNewModule);
4166	Assert(!pRecVM->fCollision);
4167
4168	pGlobalModule = (PGMMSHAREDMODULE)RTMemAllocZ(RT_OFFSETOF(GMMSHAREDMODULE, aRegions[cRegions]));
4169	if (!pGlobalModule)
4170	{
4171	AssertFailed();
4172	rc = VERR_NO_MEMORY;
4173	goto end;
4174	}
4175
4176	pGlobalModule->Core.Key = GCBaseAddr;
4177	pGlobalModule->cbModule = cbModule;
4178	/* Input limit already safe; no need to check again. */
4179	/** @todo replace with RTStrCopy */
4180	strcpy(pGlobalModule->szName, pszModuleName);
4181	strcpy(pGlobalModule->szVersion, pszVersion);
4182
4183	pGlobalModule->enmGuestOS = enmGuestOS;
4184	pGlobalModule->cRegions = cRegions;
4185
4186	for (unsigned i = 0; i < cRegions; i++)
4187	{
4188	Log(("New region %d base=%RGv size %x\n", i, pRegions[i].GCRegionAddr, pRegions[i].cbRegion));
4189	pGlobalModule->aRegions[i].GCRegionAddr = pRegions[i].GCRegionAddr;
4190	pGlobalModule->aRegions[i].cbRegion = RT_ALIGN_T(pRegions[i].cbRegion, PAGE_SIZE, uint32_t);
4191	pGlobalModule->aRegions[i].u32Alignment = 0;
4192	pGlobalModule->aRegions[i].paHCPhysPageID = NULL; /* uninitialized. */
4193	}
4194
4195	/* Save reference. */
4196	pRecVM->pGlobalModule = pGlobalModule;
4197	pRecVM->fCollision = false;
4198	pGlobalModule->cUsers++;
4199	rc = VINF_SUCCESS;
4200
4201	bool ret = RTAvlGCPtrInsert(&pGMM->pGlobalSharedModuleTree, &pGlobalModule->Core);
4202	Assert(ret);
4203
4204	Log(("GMMR0RegisterSharedModule: new global module %s\n", pszModuleName));
4205	}
4206	else
4207	{
4208	Assert(pGlobalModule->cUsers > 0);
4209
4210	/* Make sure the name and version are identical. */
4211	/** @todo replace with RTStrNCmp */
4212	if ( !strcmp(pGlobalModule->szName, pszModuleName)
4213	&& !strcmp(pGlobalModule->szVersion, pszVersion))
4214	{
4215	/* Save reference. */
4216	pRecVM->pGlobalModule = pGlobalModule;
4217	if ( fNewModule
4218	\|\| pRecVM->fCollision == true) /* colliding module unregistered and new one registered since the last check */
4219	{
4220	pGlobalModule->cUsers++;
4221	Log(("GMMR0RegisterSharedModule: using existing module %s cUser=%d!\n", pszModuleName, pGlobalModule->cUsers));
4222	}
4223	pRecVM->fCollision = false;
4224	rc = VINF_SUCCESS;
4225	}
4226	else
4227	{
4228	Log(("GMMR0RegisterSharedModule: module %s collision!\n", pszModuleName));
4229	pRecVM->fCollision = true;
4230	rc = VINF_PGM_SHARED_MODULE_COLLISION;
4231	goto end;
4232	}
4233	}
4234
4235	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4236	}
4237	else
4238	rc = VERR_INTERNAL_ERROR_5;
4239
4240	end:
4241	gmmR0MutexRelease(pGMM);
4242	return rc;
4243	#else
4244	return VERR_NOT_IMPLEMENTED;
4245	#endif
4246	}
4247
4248
4249	/**
4250	* VMMR0 request wrapper for GMMR0RegisterSharedModule.
4251	*
4252	* @returns see GMMR0RegisterSharedModule.
4253	* @param pVM Pointer to the shared VM structure.
4254	* @param idCpu VCPU id
4255	* @param pReq The request packet.
4256	*/
4257	GMMR0DECL(int) GMMR0RegisterSharedModuleReq(PVM pVM, VMCPUID idCpu, PGMMREGISTERSHAREDMODULEREQ pReq)
4258	{
4259	/*
4260	* Validate input and pass it on.
4261	*/
4262	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
4263	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
4264	AssertMsgReturn(pReq->Hdr.cbReq >= sizeof(pReq) && pReq->Hdr.cbReq == RT_UOFFSETOF(GMMREGISTERSHAREDMODULEREQ, aRegions[pReq->cRegions]), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
4265
4266	/* Pass back return code in the request packet to preserve informational codes. (VMMR3CallR0 chokes on them) */
4267	pReq->rc = GMMR0RegisterSharedModule(pVM, idCpu, pReq->enmGuestOS, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule, pReq->cRegions, pReq->aRegions);
4268	return VINF_SUCCESS;
4269	}
4270
4271	/**
4272	* Unregisters a shared module for the VM
4273	*
4274	* @returns VBox status code.
4275	* @param pVM VM handle
4276	* @param idCpu VCPU id
4277	* @param pszModuleName Module name
4278	* @param pszVersion Module version
4279	* @param GCBaseAddr Module base address
4280	* @param cbModule Module size
4281	*/
4282	GMMR0DECL(int) GMMR0UnregisterSharedModule(PVM pVM, VMCPUID idCpu, char pszModuleName, char pszVersion, RTGCPTR GCBaseAddr, uint32_t cbModule)
4283	{
4284	#ifdef VBOX_WITH_PAGE_SHARING
4285	/*
4286	* Validate input and get the basics.
4287	*/
4288	PGMM pGMM;
4289	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4290	PGVM pGVM;
4291	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
4292	if (RT_FAILURE(rc))
4293	return rc;
4294
4295	Log(("GMMR0UnregisterSharedModule %s %s base=%RGv size %x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule));
4296
4297	/*
4298	* Take the semaphore and do some more validations.
4299	*/
4300	gmmR0MutexAcquire(pGMM);
4301	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4302	{
4303	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
4304	if (pRecVM)
4305	{
4306	/* Remove reference to global shared module. */
4307	if (!pRecVM->fCollision)
4308	{
4309	PGMMSHAREDMODULE pRec = pRecVM->pGlobalModule;
4310	Assert(pRec);
4311
4312	if (pRec) /* paranoia */
4313	{
4314	Assert(pRec->cUsers);
4315	pRec->cUsers--;
4316	if (pRec->cUsers == 0)
4317	{
4318	/* Free the ranges, but leave the pages intact as there might still be references; they will be cleared by the COW mechanism. */
4319	for (unsigned i = 0; i < pRec->cRegions; i++)
4320	if (pRec->aRegions[i].paHCPhysPageID)
4321	RTMemFree(pRec->aRegions[i].paHCPhysPageID);
4322
4323	Assert(pRec->Core.Key == GCBaseAddr \|\| pRec->enmGuestOS == VBOXOSFAMILY_Windows64);
4324	Assert(pRec->cRegions == pRecVM->cRegions);
4325	#ifdef VBOX_STRICT
4326	for (unsigned i = 0; i < pRecVM->cRegions; i++)
4327	{
4328	Assert(pRecVM->aRegions[i].GCRegionAddr == pRec->aRegions[i].GCRegionAddr);
4329	Assert(pRecVM->aRegions[i].cbRegion == pRec->aRegions[i].cbRegion);
4330	}
4331	#endif
4332
4333	/* Remove from the tree and free memory. */
4334	RTAvlGCPtrRemove(&pGMM->pGlobalSharedModuleTree, pRec->Core.Key);
4335	RTMemFree(pRec);
4336	}
4337	}
4338	else
4339	rc = VERR_PGM_SHARED_MODULE_REGISTRATION_INCONSISTENCY;
4340	}
4341	else
4342	Assert(!pRecVM->pGlobalModule);
4343
4344	/* Remove from the tree and free memory. */
4345	RTAvlGCPtrRemove(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
4346	RTMemFree(pRecVM);
4347	}
4348	else
4349	rc = VERR_PGM_SHARED_MODULE_NOT_FOUND;
4350
4351	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4352	}
4353	else
4354	rc = VERR_INTERNAL_ERROR_5;
4355
4356	gmmR0MutexRelease(pGMM);
4357	return rc;
4358	#else
4359	return VERR_NOT_IMPLEMENTED;
4360	#endif
4361	}
4362
4363	/**
4364	* VMMR0 request wrapper for GMMR0UnregisterSharedModule.
4365	*
4366	* @returns see GMMR0UnregisterSharedModule.
4367	* @param pVM Pointer to the shared VM structure.
4368	* @param idCpu VCPU id
4369	* @param pReq The request packet.
4370	*/
4371	GMMR0DECL(int) GMMR0UnregisterSharedModuleReq(PVM pVM, VMCPUID idCpu, PGMMUNREGISTERSHAREDMODULEREQ pReq)
4372	{
4373	/*
4374	* Validate input and pass it on.
4375	*/
4376	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
4377	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
4378	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
4379
4380	return GMMR0UnregisterSharedModule(pVM, idCpu, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule);
4381	}
4382
4383	#ifdef VBOX_WITH_PAGE_SHARING
4384
4385	/**
4386	* Checks specified shared module range for changes
4387	*
4388	* Performs the following tasks:
4389	* - If a shared page is new, then it changes the GMM page type to shared and
4390	* returns it in the pPageDesc descriptor.
4391	* - If a shared page already exists, then it checks if the VM page is
4392	* identical and if so frees the VM page and returns the shared page in
4393	* pPageDesc descriptor.
4394	*
4395	* @remarks ASSUMES the caller has acquired the GMM semaphore!!
4396	*
4397	* @returns VBox status code.
4398	* @param pGMM Pointer to the GMM instance data.
4399	* @param pGVM Pointer to the GVM instance data.
4400	* @param pModule Module description
4401	* @param idxRegion Region index
4402	* @param idxPage Page index
4403	* @param paPageDesc Page descriptor
4404	*/
4405	GMMR0DECL(int) GMMR0SharedModuleCheckPage(PGVM pGVM, PGMMSHAREDMODULE pModule, unsigned idxRegion, unsigned idxPage,
4406	PGMMSHAREDPAGEDESC pPageDesc)
4407	{
4408	int rc = VINF_SUCCESS;
4409	PGMM pGMM;
4410	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4411	unsigned cPages = pModule->aRegions[idxRegion].cbRegion >> PAGE_SHIFT;
4412
4413	AssertReturn(idxRegion < pModule->cRegions, VERR_INVALID_PARAMETER);
4414	AssertReturn(idxPage < cPages, VERR_INVALID_PARAMETER);
4415
4416	LogFlow(("GMMR0SharedModuleCheckRange %s base %RGv region %d idxPage %d\n", pModule->szName, pModule->Core.Key, idxRegion, idxPage));
4417
4418	PGMMSHAREDREGIONDESC pGlobalRegion = &pModule->aRegions[idxRegion];
4419	if (!pGlobalRegion->paHCPhysPageID)
4420	{
4421	/* First time; create a page descriptor array. */
4422	Log(("Allocate page descriptor array for %d pages\n", cPages));
4423	pGlobalRegion->paHCPhysPageID = (uint32_t )RTMemAlloc(cPages sizeof(*pGlobalRegion->paHCPhysPageID));
4424	if (!pGlobalRegion->paHCPhysPageID)
4425	{
4426	AssertFailed();
4427	rc = VERR_NO_MEMORY;
4428	goto end;
4429	}
4430	/* Invalidate all descriptors. */
4431	for (unsigned i = 0; i < cPages; i++)
4432	pGlobalRegion->paHCPhysPageID[i] = NIL_GMM_PAGEID;
4433	}
4434
4435	/* We've seen this shared page for the first time? */
4436	if (pGlobalRegion->paHCPhysPageID[idxPage] == NIL_GMM_PAGEID)
4437	{
4438	new_shared_page:
4439	Log(("New shared page guest %RGp host %RHp\n", pPageDesc->GCPhys, pPageDesc->HCPhys));
4440
4441	/* Easy case: just change the internal page type. */
4442	PGMMPAGE pPage = gmmR0GetPage(pGMM, pPageDesc->uHCPhysPageId);
4443	if (!pPage)
4444	{
4445	Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #1 (GCPhys=%RGp HCPhys=%RHp idxRegion=%#x idxPage=%#x)\n",
4446	pPageDesc->uHCPhysPageId, pPageDesc->GCPhys, pPageDesc->HCPhys, idxRegion, idxPage));
4447	AssertFailed();
4448	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4449	goto end;
4450	}
4451
4452	AssertMsg(pPageDesc->GCPhys == (pPage->Private.pfn << 12), ("desc %RGp gmm %RGp\n", pPageDesc->HCPhys, (pPage->Private.pfn << 12)));
4453
4454	gmmR0ConvertToSharedPage(pGMM, pGVM, pPageDesc->HCPhys, pPageDesc->uHCPhysPageId, pPage);
4455
4456	/* Keep track of these references. */
4457	pGlobalRegion->paHCPhysPageID[idxPage] = pPageDesc->uHCPhysPageId;
4458	}
4459	else
4460	{
4461	uint8_t pbLocalPage, pbSharedPage;
4462	uint8_t *pbChunk;
4463	PGMMCHUNK pChunk;
4464
4465	Assert(pPageDesc->uHCPhysPageId != pGlobalRegion->paHCPhysPageID[idxPage]);
4466
4467	Log(("Replace existing page guest %RGp host %RHp id %x -> id %x\n", pPageDesc->GCPhys, pPageDesc->HCPhys, pPageDesc->uHCPhysPageId, pGlobalRegion->paHCPhysPageID[idxPage]));
4468
4469	/* Get the shared page source. */
4470	PGMMPAGE pPage = gmmR0GetPage(pGMM, pGlobalRegion->paHCPhysPageID[idxPage]);
4471	if (!pPage)
4472	{
4473	Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #2 (idxRegion=%#x idxPage=%#x)\n",
4474	pPageDesc->uHCPhysPageId, idxRegion, idxPage));
4475	AssertFailed();
4476	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4477	goto end;
4478	}
4479	if (pPage->Common.u2State != GMM_PAGE_STATE_SHARED)
4480	{
4481	/* Page was freed at some point; invalidate this entry. */
4482	/** @todo this isn't really bullet proof. */
4483	Log(("Old shared page was freed -> create a new one\n"));
4484	pGlobalRegion->paHCPhysPageID[idxPage] = NIL_GMM_PAGEID;
4485	goto new_shared_page; /* ugly goto */
4486	}
4487
4488	Log(("Replace existing page guest host %RHp -> %RHp\n", pPageDesc->HCPhys, ((uint64_t)pPage->Shared.pfn) << PAGE_SHIFT));
4489
4490	/* Calculate the virtual address of the local page. */
4491	pChunk = gmmR0GetChunk(pGMM, pPageDesc->uHCPhysPageId >> GMM_CHUNKID_SHIFT);
4492	if (pChunk)
4493	{
4494	if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
4495	{
4496	Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #3\n", pPageDesc->uHCPhysPageId));
4497	AssertFailed();
4498	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4499	goto end;
4500	}
4501	pbLocalPage = pbChunk + ((pPageDesc->uHCPhysPageId & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
4502	}
4503	else
4504	{
4505	Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #4\n", pPageDesc->uHCPhysPageId));
4506	AssertFailed();
4507	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4508	goto end;
4509	}
4510
4511	/* Calculate the virtual address of the shared page. */
4512	pChunk = gmmR0GetChunk(pGMM, pGlobalRegion->paHCPhysPageID[idxPage] >> GMM_CHUNKID_SHIFT);
4513	Assert(pChunk); /* can't fail as gmmR0GetPage succeeded. */
4514
4515	/* Get the virtual address of the physical page; map the chunk into the VM process if not already done. */
4516	if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
4517	{
4518	Log(("Map chunk into process!\n"));
4519	rc = gmmR0MapChunk(pGMM, pGVM, pChunk, false /fRelaxedSem/, (PRTR3PTR)&pbChunk);
4520	if (rc != VINF_SUCCESS)
4521	{
4522	AssertRC(rc);
4523	goto end;
4524	}
4525	}
4526	pbSharedPage = pbChunk + ((pGlobalRegion->paHCPhysPageID[idxPage] & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
4527
4528	/** @todo write ASMMemComparePage. */
4529	if (memcmp(pbSharedPage, pbLocalPage, PAGE_SIZE))
4530	{
4531	Log(("Unexpected differences found between local and shared page; skip\n"));
4532	/* Signal to the caller that this one hasn't changed. */
4533	pPageDesc->uHCPhysPageId = NIL_GMM_PAGEID;
4534	goto end;
4535	}
4536
4537	/* Free the old local page. */
4538	GMMFREEPAGEDESC PageDesc;
4539
4540	PageDesc.idPage = pPageDesc->uHCPhysPageId;
4541	rc = gmmR0FreePages(pGMM, pGVM, 1, &PageDesc, GMMACCOUNT_BASE);
4542	AssertRCReturn(rc, rc);
4543
4544	gmmR0UseSharedPage(pGMM, pGVM, pPage);
4545
4546	/* Pass along the new physical address & page id. */
4547	pPageDesc->HCPhys = ((uint64_t)pPage->Shared.pfn) << PAGE_SHIFT;
4548	pPageDesc->uHCPhysPageId = pGlobalRegion->paHCPhysPageID[idxPage];
4549	}
4550	end:
4551	return rc;
4552	}
4553
4554
4555	/**
4556	* RTAvlGCPtrDestroy callback.
4557	*
4558	* @returns 0 or VERR_INTERNAL_ERROR.
4559	* @param pNode The node to destroy.
4560	* @param pvGVM The GVM handle.
4561	*/
4562	static DECLCALLBACK(int) gmmR0CleanupSharedModule(PAVLGCPTRNODECORE pNode, void *pvGVM)
4563	{
4564	PGVM pGVM = (PGVM)pvGVM;
4565	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)pNode;
4566
4567	Assert(pRecVM->pGlobalModule \|\| pRecVM->fCollision);
4568	if (pRecVM->pGlobalModule)
4569	{
4570	PGMMSHAREDMODULE pRec = pRecVM->pGlobalModule;
4571	AssertPtr(pRec);
4572	Assert(pRec->cUsers);
4573
4574	Log(("gmmR0CleanupSharedModule: %s %s cUsers=%d\n", pRec->szName, pRec->szVersion, pRec->cUsers));
4575	pRec->cUsers--;
4576	if (pRec->cUsers == 0)
4577	{
4578	for (uint32_t i = 0; i < pRec->cRegions; i++)
4579	if (pRec->aRegions[i].paHCPhysPageID)
4580	RTMemFree(pRec->aRegions[i].paHCPhysPageID);
4581
4582	/* Remove from the tree and free memory. */
4583	PGMM pGMM;
4584	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4585	RTAvlGCPtrRemove(&pGMM->pGlobalSharedModuleTree, pRec->Core.Key);
4586	RTMemFree(pRec);
4587	}
4588	}
4589	RTMemFree(pRecVM);
4590	return 0;
4591	}
4592
4593
4594	/**
4595	* Used by GMMR0CleanupVM to clean up shared modules.
4596	*
4597	* This is called without taking the GMM lock so that it can be yielded as
4598	* needed here.
4599	*
4600	* @param pGMM The GMM handle.
4601	* @param pGVM The global VM handle.
4602	*/
4603	static void gmmR0SharedModuleCleanup(PGMM pGMM, PGVM pGVM)
4604	{
4605	gmmR0MutexAcquire(pGMM);
4606	GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
4607
4608	RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, pGVM);
4609
4610	gmmR0MutexRelease(pGMM);
4611	}
4612
4613	#endif /* VBOX_WITH_PAGE_SHARING */
4614
4615	/**
4616	* Removes all shared modules for the specified VM
4617	*
4618	* @returns VBox status code.
4619	* @param pVM VM handle
4620	* @param idCpu VCPU id
4621	*/
4622	GMMR0DECL(int) GMMR0ResetSharedModules(PVM pVM, VMCPUID idCpu)
4623	{
4624	#ifdef VBOX_WITH_PAGE_SHARING
4625	/*
4626	* Validate input and get the basics.
4627	*/
4628	PGMM pGMM;
4629	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4630	PGVM pGVM;
4631	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
4632	if (RT_FAILURE(rc))
4633	return rc;
4634
4635	/*
4636	* Take the semaphore and do some more validations.
4637	*/
4638	gmmR0MutexAcquire(pGMM);
4639	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4640	{
4641	Log(("GMMR0ResetSharedModules\n"));
4642	RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, pGVM);
4643
4644	rc = VINF_SUCCESS;
4645	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4646	}
4647	else
4648	rc = VERR_INTERNAL_ERROR_5;
4649
4650	gmmR0MutexRelease(pGMM);
4651	return rc;
4652	#else
4653	return VERR_NOT_IMPLEMENTED;
4654	#endif
4655	}
4656
4657	#ifdef VBOX_WITH_PAGE_SHARING
4658
4659	typedef struct
4660	{
4661	PGVM pGVM;
4662	VMCPUID idCpu;
4663	int rc;
4664	} GMMCHECKSHAREDMODULEINFO, *PGMMCHECKSHAREDMODULEINFO;
4665
4666	/**
4667	* Tree enumeration callback for checking a shared module.
4668	*/
4669	DECLCALLBACK(int) gmmR0CheckSharedModule(PAVLGCPTRNODECORE pNode, void *pvUser)
4670	{
4671	PGMMCHECKSHAREDMODULEINFO pInfo = (PGMMCHECKSHAREDMODULEINFO)pvUser;
4672	PGMMSHAREDMODULEPERVM pLocalModule = (PGMMSHAREDMODULEPERVM)pNode;
4673	PGMMSHAREDMODULE pGlobalModule = pLocalModule->pGlobalModule;
4674
4675	if ( !pLocalModule->fCollision
4676	&& pGlobalModule)
4677	{
4678	Log(("gmmR0CheckSharedModule: check %s %s base=%RGv size=%x collision=%d\n", pGlobalModule->szName, pGlobalModule->szVersion, pGlobalModule->Core.Key, pGlobalModule->cbModule, pLocalModule->fCollision));
4679	pInfo->rc = PGMR0SharedModuleCheck(pInfo->pGVM->pVM, pInfo->pGVM, pInfo->idCpu, pGlobalModule, pLocalModule->cRegions, pLocalModule->aRegions);
4680	if (RT_FAILURE(pInfo->rc))
4681	return 1; /* stop enumeration. */
4682	}
4683	return 0;
4684	}
4685
4686	#endif /* VBOX_WITH_PAGE_SHARING */
4687	#ifdef DEBUG_sandervl
4688
4689	/**
4690	* Setup for a GMMR0CheckSharedModules call (to allow log flush jumps back to ring 3)
4691	*
4692	* @returns VBox status code.
4693	* @param pVM VM handle
4694	*/
4695	GMMR0DECL(int) GMMR0CheckSharedModulesStart(PVM pVM)
4696	{
4697	/*
4698	* Validate input and get the basics.
4699	*/
4700	PGMM pGMM;
4701	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4702
4703	/*
4704	* Take the semaphore and do some more validations.
4705	*/
4706	gmmR0MutexAcquire(pGMM);
4707	if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4708	rc = VERR_INTERNAL_ERROR_5;
4709	else
4710	rc = VINF_SUCCESS;
4711
4712	return rc;
4713	}
4714
4715	/**
4716	* Clean up after a GMMR0CheckSharedModules call (to allow log flush jumps back to ring 3)
4717	*
4718	* @returns VBox status code.
4719	* @param pVM VM handle
4720	*/
4721	GMMR0DECL(int) GMMR0CheckSharedModulesEnd(PVM pVM)
4722	{
4723	/*
4724	* Validate input and get the basics.
4725	*/
4726	PGMM pGMM;
4727	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4728
4729	gmmR0MutexRelease(pGMM);
4730	return VINF_SUCCESS;
4731	}
4732
4733	#endif /* DEBUG_sandervl */
4734
4735	/**
4736	* Check all shared modules for the specified VM
4737	*
4738	* @returns VBox status code.
4739	* @param pVM VM handle
4740	* @param pVCpu VMCPU handle
4741	*/
4742	GMMR0DECL(int) GMMR0CheckSharedModules(PVM pVM, PVMCPU pVCpu)
4743	{
4744	#ifdef VBOX_WITH_PAGE_SHARING
4745	/*
4746	* Validate input and get the basics.
4747	*/
4748	PGMM pGMM;
4749	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4750	PGVM pGVM;
4751	int rc = GVMMR0ByVMAndEMT(pVM, pVCpu->idCpu, &pGVM);
4752	if (RT_FAILURE(rc))
4753	return rc;
4754
4755	# ifndef DEBUG_sandervl
4756	/*
4757	* Take the semaphore and do some more validations.
4758	*/
4759	gmmR0MutexAcquire(pGMM);
4760	# endif
4761	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4762	{
4763	GMMCHECKSHAREDMODULEINFO Info;
4764
4765	Log(("GMMR0CheckSharedModules\n"));
4766	Info.pGVM = pGVM;
4767	Info.idCpu = pVCpu->idCpu;
4768	Info.rc = VINF_SUCCESS;
4769
4770	RTAvlGCPtrDoWithAll(&pGVM->gmm.s.pSharedModuleTree, true /* fFromLeft */, gmmR0CheckSharedModule, &Info);
4771
4772	rc = Info.rc;
4773
4774	Log(("GMMR0CheckSharedModules done!\n"));
4775
4776	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4777	}
4778	else
4779	rc = VERR_INTERNAL_ERROR_5;
4780
4781	# ifndef DEBUG_sandervl
4782	gmmR0MutexRelease(pGMM);
4783	# endif
4784	return rc;
4785	#else
4786	return VERR_NOT_IMPLEMENTED;
4787	#endif
4788	}
4789
4790	#if defined(VBOX_STRICT) && HC_ARCH_BITS == 64
4791
4792	typedef struct
4793	{
4794	PGVM pGVM;
4795	PGMM pGMM;
4796	uint8_t *pSourcePage;
4797	bool fFoundDuplicate;
4798	} GMMFINDDUPPAGEINFO, *PGMMFINDDUPPAGEINFO;
4799
4800	/**
4801	* RTAvlU32DoWithAll callback.
4802	*
4803	* @returns 0
4804	* @param pNode The node to search.
4805	* @param pvInfo Pointer to the input parameters
4806	*/
4807	static DECLCALLBACK(int) gmmR0FindDupPageInChunk(PAVLU32NODECORE pNode, void *pvInfo)
4808	{
4809	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
4810	PGMMFINDDUPPAGEINFO pInfo = (PGMMFINDDUPPAGEINFO)pvInfo;
4811	PGVM pGVM = pInfo->pGVM;
4812	PGMM pGMM = pInfo->pGMM;
4813	uint8_t *pbChunk;
4814
4815	/* Only take chunks not mapped into this VM process; not entirely correct. */
4816	if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
4817	{
4818	int rc = gmmR0MapChunk(pGMM, pGVM, pChunk, false /fRelaxedSem/, (PRTR3PTR)&pbChunk);
4819	if (RT_SUCCESS(rc))
4820	{
4821	/*
4822	* Look for duplicate pages
4823	*/
4824	unsigned iPage = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
4825	while (iPage-- > 0)
4826	{
4827	if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
4828	{
4829	uint8_t *pbDestPage = pbChunk + (iPage << PAGE_SHIFT);
4830
4831	if (!memcmp(pInfo->pSourcePage, pbDestPage, PAGE_SIZE))
4832	{
4833	pInfo->fFoundDuplicate = true;
4834	break;
4835	}
4836	}
4837	}
4838	gmmR0UnmapChunk(pGMM, pGVM, pChunk, false /fRelaxedSem/);
4839	}
4840	}
4841	return pInfo->fFoundDuplicate; /* (stops search if true) */
4842	}
4843
4844
4845	/**
4846	* Find a duplicate of the specified page in other active VMs
4847	*
4848	* @returns VBox status code.
4849	* @param pVM VM handle
4850	* @param pReq Request packet
4851	*/
4852	GMMR0DECL(int) GMMR0FindDuplicatePageReq(PVM pVM, PGMMFINDDUPLICATEPAGEREQ pReq)
4853	{
4854	/*
4855	* Validate input and pass it on.
4856	*/
4857	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
4858	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
4859	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
4860
4861	PGMM pGMM;
4862	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4863
4864	PGVM pGVM;
4865	int rc = GVMMR0ByVM(pVM, &pGVM);
4866	if (RT_FAILURE(rc))
4867	return rc;
4868
4869	/*
4870	* Take the semaphore and do some more validations.
4871	*/
4872	rc = gmmR0MutexAcquire(pGMM);
4873	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4874	{
4875	uint8_t *pbChunk;
4876	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, pReq->idPage >> GMM_CHUNKID_SHIFT);
4877	if (pChunk)
4878	{
4879	if (gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
4880	{
4881	uint8_t *pbSourcePage = pbChunk + ((pReq->idPage & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
4882	PGMMPAGE pPage = gmmR0GetPage(pGMM, pReq->idPage);
4883	if (pPage)
4884	{
4885	GMMFINDDUPPAGEINFO Info;
4886	Info.pGVM = pGVM;
4887	Info.pGMM = pGMM;
4888	Info.pSourcePage = pbSourcePage;
4889	Info.fFoundDuplicate = false;
4890	RTAvlU32DoWithAll(&pGMM->pChunks, true /* fFromLeft */, gmmR0FindDupPageInChunk, &Info);
4891
4892	pReq->fDuplicate = Info.fFoundDuplicate;
4893	}
4894	else
4895	{
4896	AssertFailed();
4897	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4898	}
4899	}
4900	else
4901	AssertFailed();
4902	}
4903	else
4904	AssertFailed();
4905	}
4906	else
4907	rc = VERR_INTERNAL_ERROR_5;
4908
4909	gmmR0MutexRelease(pGMM);
4910	return rc;
4911	}
4912
4913	#endif /* VBOX_STRICT && HC_ARCH_BITS == 64 */
4914

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMR0/GMMR0.cpp@ 37214

Download in other formats: