GMMR0.cpp@ 37203

Last change on this file since 37203 was 37203, checked in by vboxsync, 14 years ago
GMMR0: Try reduce chunk lock contention.
Property svn:eol-style set to `native` Property svn:keywords set to `Id`
File size: 166.8 KB

Line
1	/* $Id: GMMR0.cpp 37203 2011-05-24 16:12:43Z vboxsync $ */
2	/** @file
3	* GMM - Global Memory Manager.
4	*/
5
6	/*
7	* Copyright (C) 2007-2011 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18
19	/** @page pg_gmm GMM - The Global Memory Manager
20	*
21	* As the name indicates, this component is responsible for global memory
22	* management. Currently only guest RAM is allocated from the GMM, but this
23	* may change to include shadow page tables and other bits later.
24	*
25	* Guest RAM is managed as individual pages, but allocated from the host OS
26	* in chunks for reasons of portability / efficiency. To minimize the memory
27	* footprint all tracking structure must be as small as possible without
28	* unnecessary performance penalties.
29	*
30	* The allocation chunks has fixed sized, the size defined at compile time
31	* by the #GMM_CHUNK_SIZE \#define.
32	*
33	* Each chunk is given an unique ID. Each page also has a unique ID. The
34	* relation ship between the two IDs is:
35	* @code
36	* GMM_CHUNK_SHIFT = log2(GMM_CHUNK_SIZE / PAGE_SIZE);
37	* idPage = (idChunk << GMM_CHUNK_SHIFT) \| iPage;
38	* @endcode
39	* Where iPage is the index of the page within the chunk. This ID scheme
40	* permits for efficient chunk and page lookup, but it relies on the chunk size
41	* to be set at compile time. The chunks are organized in an AVL tree with their
42	* IDs being the keys.
43	*
44	* The physical address of each page in an allocation chunk is maintained by
45	* the #RTR0MEMOBJ and obtained using #RTR0MemObjGetPagePhysAddr. There is no
46	* need to duplicate this information (it'll cost 8-bytes per page if we did).
47	*
48	* So what do we need to track per page? Most importantly we need to know
49	* which state the page is in:
50	* - Private - Allocated for (eventually) backing one particular VM page.
51	* - Shared - Readonly page that is used by one or more VMs and treated
52	* as COW by PGM.
53	* - Free - Not used by anyone.
54	*
55	* For the page replacement operations (sharing, defragmenting and freeing)
56	* to be somewhat efficient, private pages needs to be associated with a
57	* particular page in a particular VM.
58	*
59	* Tracking the usage of shared pages is impractical and expensive, so we'll
60	* settle for a reference counting system instead.
61	*
62	* Free pages will be chained on LIFOs
63	*
64	* On 64-bit systems we will use a 64-bit bitfield per page, while on 32-bit
65	* systems a 32-bit bitfield will have to suffice because of address space
66	* limitations. The #GMMPAGE structure shows the details.
67	*
68	*
69	* @section sec_gmm_alloc_strat Page Allocation Strategy
70	*
71	* The strategy for allocating pages has to take fragmentation and shared
72	* pages into account, or we may end up with with 2000 chunks with only
73	* a few pages in each. Shared pages cannot easily be reallocated because
74	* of the inaccurate usage accounting (see above). Private pages can be
75	* reallocated by a defragmentation thread in the same manner that sharing
76	* is done.
77	*
78	* The first approach is to manage the free pages in two sets depending on
79	* whether they are mainly for the allocation of shared or private pages.
80	* In the initial implementation there will be almost no possibility for
81	* mixing shared and private pages in the same chunk (only if we're really
82	* stressed on memory), but when we implement forking of VMs and have to
83	* deal with lots of COW pages it'll start getting kind of interesting.
84	*
85	* The sets are lists of chunks with approximately the same number of
86	* free pages. Say the chunk size is 1MB, meaning 256 pages, and a set
87	* consists of 16 lists. So, the first list will contain the chunks with
88	* 1-7 free pages, the second covers 8-15, and so on. The chunks will be
89	* moved between the lists as pages are freed up or allocated.
90	*
91	*
92	* @section sec_gmm_costs Costs
93	*
94	* The per page cost in kernel space is 32-bit plus whatever RTR0MEMOBJ
95	* entails. In addition there is the chunk cost of approximately
96	* (sizeof(RT0MEMOBJ) + sizeof(CHUNK)) / 2^CHUNK_SHIFT bytes per page.
97	*
98	* On Windows the per page #RTR0MEMOBJ cost is 32-bit on 32-bit windows
99	* and 64-bit on 64-bit windows (a PFN_NUMBER in the MDL). So, 64-bit per page.
100	* The cost on Linux is identical, but here it's because of sizeof(struct page *).
101	*
102	*
103	* @section sec_gmm_legacy Legacy Mode for Non-Tier-1 Platforms
104	*
105	* In legacy mode the page source is locked user pages and not
106	* #RTR0MemObjAllocPhysNC, this means that a page can only be allocated
107	* by the VM that locked it. We will make no attempt at implementing
108	* page sharing on these systems, just do enough to make it all work.
109	*
110	*
111	* @subsection sub_gmm_locking Serializing
112	*
113	* One simple fast mutex will be employed in the initial implementation, not
114	* two as mentioned in @ref subsec_pgmPhys_Serializing.
115	*
116	* @see @ref subsec_pgmPhys_Serializing
117	*
118	*
119	* @section sec_gmm_overcommit Memory Over-Commitment Management
120	*
121	* The GVM will have to do the system wide memory over-commitment
122	* management. My current ideas are:
123	* - Per VM oc policy that indicates how much to initially commit
124	* to it and what to do in a out-of-memory situation.
125	* - Prevent overtaxing the host.
126	*
127	* There are some challenges here, the main ones are configurability and
128	* security. Should we for instance permit anyone to request 100% memory
129	* commitment? Who should be allowed to do runtime adjustments of the
130	* config. And how to prevent these settings from being lost when the last
131	* VM process exits? The solution is probably to have an optional root
132	* daemon the will keep VMMR0.r0 in memory and enable the security measures.
133	*
134	*
135	*
136	* @section sec_gmm_numa NUMA
137	*
138	* NUMA considerations will be designed and implemented a bit later.
139	*
140	* The preliminary guesses is that we will have to try allocate memory as
141	* close as possible to the CPUs the VM is executed on (EMT and additional CPU
142	* threads). Which means it's mostly about allocation and sharing policies.
143	* Both the scheduler and allocator interface will to supply some NUMA info
144	* and we'll need to have a way to calc access costs.
145	*
146	*/
147
148
149	/*******************************************************************************
150	* Header Files *
151	*******************************************************************************/
152	#define LOG_GROUP LOG_GROUP_GMM
153	#include <VBox/rawpci.h>
154	#include <VBox/vmm/vm.h>
155	#include <VBox/vmm/gmm.h>
156	#include "GMMR0Internal.h"
157	#include <VBox/vmm/gvm.h>
158	#include <VBox/vmm/pgm.h>
159	#include <VBox/log.h>
160	#include <VBox/param.h>
161	#include <VBox/err.h>
162	#include <iprt/asm.h>
163	#include <iprt/avl.h>
164	#include <iprt/list.h>
165	#include <iprt/mem.h>
166	#include <iprt/memobj.h>
167	#include <iprt/semaphore.h>
168	#include <iprt/string.h>
169	#include <iprt/time.h>
170
171
172	/*******************************************************************************
173	* Structures and Typedefs *
174	*******************************************************************************/
175	/** Pointer to set of free chunks. */
176	typedef struct GMMCHUNKFREESET *PGMMCHUNKFREESET;
177
178	/** Pointer to a GMM allocation chunk. */
179	typedef struct GMMCHUNK *PGMMCHUNK;
180
181	/**
182	* The per-page tracking structure employed by the GMM.
183	*
184	* On 32-bit hosts we'll some trickery is necessary to compress all
185	* the information into 32-bits. When the fSharedFree member is set,
186	* the 30th bit decides whether it's a free page or not.
187	*
188	* Because of the different layout on 32-bit and 64-bit hosts, macros
189	* are used to get and set some of the data.
190	*/
191	typedef union GMMPAGE
192	{
193	#if HC_ARCH_BITS == 64
194	/** Unsigned integer view. */
195	uint64_t u;
196
197	/** The common view. */
198	struct GMMPAGECOMMON
199	{
200	uint32_t uStuff1 : 32;
201	uint32_t uStuff2 : 30;
202	/** The page state. */
203	uint32_t u2State : 2;
204	} Common;
205
206	/** The view of a private page. */
207	struct GMMPAGEPRIVATE
208	{
209	/** The guest page frame number. (Max addressable: 2 ^ 44 - 16) */
210	uint32_t pfn;
211	/** The GVM handle. (64K VMs) */
212	uint32_t hGVM : 16;
213	/** Reserved. */
214	uint32_t u16Reserved : 14;
215	/** The page state. */
216	uint32_t u2State : 2;
217	} Private;
218
219	/** The view of a shared page. */
220	struct GMMPAGESHARED
221	{
222	/** The host page frame number. (Max addressable: 2 ^ 44 - 16) */
223	uint32_t pfn;
224	/** The reference count (64K VMs). */
225	uint32_t cRefs : 16;
226	/** Reserved. Checksum or something? Two hGVMs for forking? */
227	uint32_t u14Reserved : 14;
228	/** The page state. */
229	uint32_t u2State : 2;
230	} Shared;
231
232	/** The view of a free page. */
233	struct GMMPAGEFREE
234	{
235	/** The index of the next page in the free list. UINT16_MAX is NIL. */
236	uint16_t iNext;
237	/** Reserved. Checksum or something? */
238	uint16_t u16Reserved0;
239	/** Reserved. Checksum or something? */
240	uint32_t u30Reserved1 : 30;
241	/** The page state. */
242	uint32_t u2State : 2;
243	} Free;
244
245	#else /* 32-bit */
246	/** Unsigned integer view. */
247	uint32_t u;
248
249	/** The common view. */
250	struct GMMPAGECOMMON
251	{
252	uint32_t uStuff : 30;
253	/** The page state. */
254	uint32_t u2State : 2;
255	} Common;
256
257	/** The view of a private page. */
258	struct GMMPAGEPRIVATE
259	{
260	/** The guest page frame number. (Max addressable: 2 ^ 36) */
261	uint32_t pfn : 24;
262	/** The GVM handle. (127 VMs) */
263	uint32_t hGVM : 7;
264	/** The top page state bit, MBZ. */
265	uint32_t fZero : 1;
266	} Private;
267
268	/** The view of a shared page. */
269	struct GMMPAGESHARED
270	{
271	/** The reference count. */
272	uint32_t cRefs : 30;
273	/** The page state. */
274	uint32_t u2State : 2;
275	} Shared;
276
277	/** The view of a free page. */
278	struct GMMPAGEFREE
279	{
280	/** The index of the next page in the free list. UINT16_MAX is NIL. */
281	uint32_t iNext : 16;
282	/** Reserved. Checksum or something? */
283	uint32_t u14Reserved : 14;
284	/** The page state. */
285	uint32_t u2State : 2;
286	} Free;
287	#endif
288	} GMMPAGE;
289	AssertCompileSize(GMMPAGE, sizeof(RTHCUINTPTR));
290	/** Pointer to a GMMPAGE. */
291	typedef GMMPAGE *PGMMPAGE;
292
293
294	/** @name The Page States.
295	* @{ */
296	/** A private page. */
297	#define GMM_PAGE_STATE_PRIVATE 0
298	/** A private page - alternative value used on the 32-bit implementation.
299	* This will never be used on 64-bit hosts. */
300	#define GMM_PAGE_STATE_PRIVATE_32 1
301	/** A shared page. */
302	#define GMM_PAGE_STATE_SHARED 2
303	/** A free page. */
304	#define GMM_PAGE_STATE_FREE 3
305	/** @} */
306
307
308	/** @def GMM_PAGE_IS_PRIVATE
309	*
310	* @returns true if private, false if not.
311	* @param pPage The GMM page.
312	*/
313	#if HC_ARCH_BITS == 64
314	# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_PRIVATE )
315	#else
316	# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Private.fZero == 0 )
317	#endif
318
319	/** @def GMM_PAGE_IS_SHARED
320	*
321	* @returns true if shared, false if not.
322	* @param pPage The GMM page.
323	*/
324	#define GMM_PAGE_IS_SHARED(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_SHARED )
325
326	/** @def GMM_PAGE_IS_FREE
327	*
328	* @returns true if free, false if not.
329	* @param pPage The GMM page.
330	*/
331	#define GMM_PAGE_IS_FREE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_FREE )
332
333	/** @def GMM_PAGE_PFN_LAST
334	* The last valid guest pfn range.
335	* @remark Some of the values outside the range has special meaning,
336	* see GMM_PAGE_PFN_UNSHAREABLE.
337	*/
338	#if HC_ARCH_BITS == 64
339	# define GMM_PAGE_PFN_LAST UINT32_C(0xfffffff0)
340	#else
341	# define GMM_PAGE_PFN_LAST UINT32_C(0x00fffff0)
342	#endif
343	AssertCompile(GMM_PAGE_PFN_LAST == (GMM_GCPHYS_LAST >> PAGE_SHIFT));
344
345	/** @def GMM_PAGE_PFN_UNSHAREABLE
346	* Indicates that this page isn't used for normal guest memory and thus isn't shareable.
347	*/
348	#if HC_ARCH_BITS == 64
349	# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0xfffffff1)
350	#else
351	# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0x00fffff1)
352	#endif
353	AssertCompile(GMM_PAGE_PFN_UNSHAREABLE == (GMM_GCPHYS_UNSHAREABLE >> PAGE_SHIFT));
354
355
356	/**
357	* A GMM allocation chunk ring-3 mapping record.
358	*
359	* This should really be associated with a session and not a VM, but
360	* it's simpler to associated with a VM and cleanup with the VM object
361	* is destroyed.
362	*/
363	typedef struct GMMCHUNKMAP
364	{
365	/** The mapping object. */
366	RTR0MEMOBJ hMapObj;
367	/** The VM owning the mapping. */
368	PGVM pGVM;
369	} GMMCHUNKMAP;
370	/** Pointer to a GMM allocation chunk mapping. */
371	typedef struct GMMCHUNKMAP *PGMMCHUNKMAP;
372
373
374	/**
375	* A GMM allocation chunk.
376	*/
377	typedef struct GMMCHUNK
378	{
379	/** The AVL node core.
380	* The Key is the chunk ID. (Giant mtx.) */
381	AVLU32NODECORE Core;
382	/** The memory object.
383	* Either from RTR0MemObjAllocPhysNC or RTR0MemObjLockUser depending on
384	* what the host can dish up with. (Chunk mtx protects mapping accesses
385	* and related frees.) */
386	RTR0MEMOBJ hMemObj;
387	/** Pointer to the next chunk in the free list. (Giant mtx.) */
388	PGMMCHUNK pFreeNext;
389	/** Pointer to the previous chunk in the free list. (Giant mtx.) */
390	PGMMCHUNK pFreePrev;
391	/** Pointer to the free set this chunk belongs to. NULL for
392	* chunks with no free pages. (Giant mtx.) */
393	PGMMCHUNKFREESET pSet;
394	/** List node in the chunk list (GMM::ChunkList). (Giant mtx.) */
395	RTLISTNODE ListNode;
396	/** Pointer to an array of mappings. (Chunk mtx.) */
397	PGMMCHUNKMAP paMappingsX;
398	/** The number of mappings. (Chunk mtx.) */
399	uint16_t cMappingsX;
400	/** The mapping lock this chunk is using using. UINT16_MAX if nobody is
401	* mapping or freeing anything. (Giant mtx.) */
402	uint8_t volatile iChunkMtx;
403	/** Flags field reserved for future use (like eliminating enmType).
404	* (Giant mtx.) */
405	uint8_t fFlags;
406	/** The head of the list of free pages. UINT16_MAX is the NIL value.
407	* (Giant mtx.) */
408	uint16_t iFreeHead;
409	/** The number of free pages. (Giant mtx.) */
410	uint16_t cFree;
411	/** The GVM handle of the VM that first allocated pages from this chunk, this
412	* is used as a preference when there are several chunks to choose from.
413	* When in bound memory mode this isn't a preference any longer. (Giant
414	* mtx.) */
415	uint16_t hGVM;
416	/** The ID of the NUMA node the memory mostly resides on. (Reserved for
417	* future use.) (Giant mtx.) */
418	uint16_t idNumaNode;
419	/** The number of private pages. (Giant mtx.) */
420	uint16_t cPrivate;
421	/** The number of shared pages. (Giant mtx.) */
422	uint16_t cShared;
423	/** The pages. (Giant mtx.) */
424	GMMPAGE aPages[GMM_CHUNK_SIZE >> PAGE_SHIFT];
425	} GMMCHUNK;
426
427	/** Indicates that the NUMA properies of the memory is unknown. */
428	#define GMM_CHUNK_NUMA_ID_UNKNOWN UINT16_C(0xfffe)
429
430	/** @name GMM_CHUNK_FLAGS_XXX - chunk flags.
431	* @{ */
432	/** Indicates that the chunk is a large page (2MB). */
433	#define GMM_CHUNK_FLAGS_LARGE_PAGE UINT16_C(0x0001)
434	/** @} */
435
436
437	/**
438	* An allocation chunk TLB entry.
439	*/
440	typedef struct GMMCHUNKTLBE
441	{
442	/** The chunk id. */
443	uint32_t idChunk;
444	/** Pointer to the chunk. */
445	PGMMCHUNK pChunk;
446	} GMMCHUNKTLBE;
447	/** Pointer to an allocation chunk TLB entry. */
448	typedef GMMCHUNKTLBE *PGMMCHUNKTLBE;
449
450
451	/** The number of entries tin the allocation chunk TLB. */
452	#define GMM_CHUNKTLB_ENTRIES 32
453	/** Gets the TLB entry index for the given Chunk ID. */
454	#define GMM_CHUNKTLB_IDX(idChunk) ( (idChunk) & (GMM_CHUNKTLB_ENTRIES - 1) )
455
456	/**
457	* An allocation chunk TLB.
458	*/
459	typedef struct GMMCHUNKTLB
460	{
461	/** The TLB entries. */
462	GMMCHUNKTLBE aEntries[GMM_CHUNKTLB_ENTRIES];
463	} GMMCHUNKTLB;
464	/** Pointer to an allocation chunk TLB. */
465	typedef GMMCHUNKTLB *PGMMCHUNKTLB;
466
467
468	/** The GMMCHUNK::cFree shift count. */
469	#define GMM_CHUNK_FREE_SET_SHIFT 4
470
471
472	/**
473	* A set of free chunks.
474	*/
475	typedef struct GMMCHUNKFREESET
476	{
477	/** The number of free pages in the set. */
478	uint64_t cFreePages;
479	/** The generation ID for the set. This is incremented whenever
480	* something is linked or unlinked from this set. */
481	uint64_t idGeneration;
482	/** Chunks ordered by increasing number of free pages. */
483	PGMMCHUNK apLists[GMM_CHUNK_NUM_PAGES >> GMM_CHUNK_FREE_SET_SHIFT];
484	} GMMCHUNKFREESET;
485
486
487	/**
488	* The GMM instance data.
489	*/
490	typedef struct GMM
491	{
492	/** Magic / eye catcher. GMM_MAGIC */
493	uint32_t u32Magic;
494	/** The number of threads waiting on the mutex. */
495	uint32_t cMtxContenders;
496	/** The fast mutex protecting the GMM.
497	* More fine grained locking can be implemented later if necessary. */
498	RTSEMFASTMUTEX hMtx;
499	#ifdef VBOX_STRICT
500	/** The current mutex owner. */
501	RTNATIVETHREAD hMtxOwner;
502	#endif
503	/** The chunk tree. */
504	PAVLU32NODECORE pChunks;
505	/** The chunk TLB. */
506	GMMCHUNKTLB ChunkTLB;
507	/** The private free set. */
508	GMMCHUNKFREESET Private;
509	/** The shared free set. */
510	GMMCHUNKFREESET Shared;
511
512	/** Shared module tree (global). */
513	/** @todo separate trees for distinctly different guest OSes. */
514	PAVLGCPTRNODECORE pGlobalSharedModuleTree;
515
516	/** The fast mutex protecting the GMM cleanup.
517	* This is serializes VMs cleaning up their memory, so that we can
518	* safely leave the primary mutex (hMtx). */
519	RTSEMFASTMUTEX hMtxCleanup;
520	/** The chunk list. For simplifying the cleanup process. */
521	RTLISTNODE ChunkList;
522
523	/** The maximum number of pages we're allowed to allocate.
524	* @gcfgm 64-bit GMM/MaxPages Direct.
525	* @gcfgm 32-bit GMM/PctPages Relative to the number of host pages. */
526	uint64_t cMaxPages;
527	/** The number of pages that has been reserved.
528	* The deal is that cReservedPages - cOverCommittedPages <= cMaxPages. */
529	uint64_t cReservedPages;
530	/** The number of pages that we have over-committed in reservations. */
531	uint64_t cOverCommittedPages;
532	/** The number of actually allocated (committed if you like) pages. */
533	uint64_t cAllocatedPages;
534	/** The number of pages that are shared. A subset of cAllocatedPages. */
535	uint64_t cSharedPages;
536	/** The number of pages that are actually shared between VMs. */
537	uint64_t cDuplicatePages;
538	/** The number of pages that are shared that has been left behind by
539	* VMs not doing proper cleanups. */
540	uint64_t cLeftBehindSharedPages;
541	/** The number of allocation chunks.
542	* (The number of pages we've allocated from the host can be derived from this.) */
543	uint32_t cChunks;
544	/** The number of current ballooned pages. */
545	uint64_t cBalloonedPages;
546
547	/** The legacy allocation mode indicator.
548	* This is determined at initialization time. */
549	bool fLegacyAllocationMode;
550	/** The bound memory mode indicator.
551	* When set, the memory will be bound to a specific VM and never
552	* shared. This is always set if fLegacyAllocationMode is set.
553	* (Also determined at initialization time.) */
554	bool fBoundMemoryMode;
555	/** The number of registered VMs. */
556	uint16_t cRegisteredVMs;
557
558	/** The number of freed chunks ever. This is used a list generation to
559	* avoid restarting the cleanup scanning when the list wasn't modified. */
560	uint32_t cFreedChunks;
561	/** The previous allocated Chunk ID.
562	* Used as a hint to avoid scanning the whole bitmap. */
563	uint32_t idChunkPrev;
564	/** Chunk ID allocation bitmap.
565	* Bits of allocated IDs are set, free ones are clear.
566	* The NIL id (0) is marked allocated. */
567	uint32_t bmChunkId[(GMM_CHUNKID_LAST + 1 + 31) / 32];
568
569	/** The index of the next mutex to use. */
570	uint32_t iNextChunkMtx;
571	/** Chunk locks for reducing lock contention without having to allocate
572	* one lock per chunk. */
573	struct
574	{
575	/** The mutex */
576	RTSEMFASTMUTEX hMtx;
577	/** The number of threads currently using this mutex. */
578	uint32_t volatile cUsers;
579	} aChunkMtx[64];
580	} GMM;
581	/** Pointer to the GMM instance. */
582	typedef GMM *PGMM;
583
584	/** The value of GMM::u32Magic (Katsuhiro Otomo). */
585	#define GMM_MAGIC UINT32_C(0x19540414)
586
587
588	/**
589	* GMM chunk mutex state.
590	*
591	* This is returned by gmmR0ChunkMutexAcquire and is used by the other
592	* gmmR0ChunkMutex* methods.
593	*/
594	typedef struct GMMR0CHUNKMTXSTATE
595	{
596	PGMM pGMM;
597	/** The index of the chunk mutex. */
598	uint8_t iChunkMtx;
599	/** The relevant flags (GMMR0CHUNK_MTX_XXX). */
600	uint8_t fFlags;
601	} GMMR0CHUNKMTXSTATE;
602	/** Pointer to a chunk mutex state. */
603	typedef GMMR0CHUNKMTXSTATE *PGMMR0CHUNKMTXSTATE;
604
605	/** @name GMMR0CHUNK_MTX_XXX
606	* @{ */
607	#define GMMR0CHUNK_MTX_INVALID UINT32_C(0)
608	#define GMMR0CHUNK_MTX_KEEP_GIANT UINT32_C(1)
609	#define GMMR0CHUNK_MTX_RETAKE_GIANT UINT32_C(2)
610	#define GMMR0CHUNK_MTX_DROP_GIANT UINT32_C(3)
611	#define GMMR0CHUNK_MTX_END UINT32_C(4)
612	/** @} */
613
614
615	/*******************************************************************************
616	* Global Variables *
617	*******************************************************************************/
618	/** Pointer to the GMM instance data. */
619	static PGMM g_pGMM = NULL;
620
621	/** Macro for obtaining and validating the g_pGMM pointer.
622	* On failure it will return from the invoking function with the specified return value.
623	*
624	* @param pGMM The name of the pGMM variable.
625	* @param rc The return value on failure. Use VERR_INTERNAL_ERROR for
626	* VBox status codes.
627	*/
628	#define GMM_GET_VALID_INSTANCE(pGMM, rc) \
629	do { \
630	(pGMM) = g_pGMM; \
631	AssertPtrReturn((pGMM), (rc)); \
632	AssertMsgReturn((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic), (rc)); \
633	} while (0)
634
635	/** Macro for obtaining and validating the g_pGMM pointer, void function variant.
636	* On failure it will return from the invoking function.
637	*
638	* @param pGMM The name of the pGMM variable.
639	*/
640	#define GMM_GET_VALID_INSTANCE_VOID(pGMM) \
641	do { \
642	(pGMM) = g_pGMM; \
643	AssertPtrReturnVoid((pGMM)); \
644	AssertMsgReturnVoid((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic)); \
645	} while (0)
646
647
648	/** @def GMM_CHECK_SANITY_UPON_ENTERING
649	* Checks the sanity of the GMM instance data before making changes.
650	*
651	* This is macro is a stub by default and must be enabled manually in the code.
652	*
653	* @returns true if sane, false if not.
654	* @param pGMM The name of the pGMM variable.
655	*/
656	#if defined(VBOX_STRICT) && 0
657	# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
658	#else
659	# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (true)
660	#endif
661
662	/** @def GMM_CHECK_SANITY_UPON_LEAVING
663	* Checks the sanity of the GMM instance data after making changes.
664	*
665	* This is macro is a stub by default and must be enabled manually in the code.
666	*
667	* @returns true if sane, false if not.
668	* @param pGMM The name of the pGMM variable.
669	*/
670	#if defined(VBOX_STRICT) && 0
671	# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
672	#else
673	# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (true)
674	#endif
675
676	/** @def GMM_CHECK_SANITY_IN_LOOPS
677	* Checks the sanity of the GMM instance in the allocation loops.
678	*
679	* This is macro is a stub by default and must be enabled manually in the code.
680	*
681	* @returns true if sane, false if not.
682	* @param pGMM The name of the pGMM variable.
683	*/
684	#if defined(VBOX_STRICT) && 0
685	# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
686	#else
687	# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (true)
688	#endif
689
690
691	/*******************************************************************************
692	* Internal Functions *
693	*******************************************************************************/
694	static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM);
695	static bool gmmR0CleanupVMScanChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
696	DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet);
697	DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk);
698	static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo);
699	static void gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
700	static void gmmR0FreeSharedPage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage);
701	static int gmmR0UnmapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
702	static void gmmR0SharedModuleCleanup(PGMM pGMM, PGVM pGVM);
703
704
705
706	/**
707	* Initializes the GMM component.
708	*
709	* This is called when the VMMR0.r0 module is loaded and protected by the
710	* loader semaphore.
711	*
712	* @returns VBox status code.
713	*/
714	GMMR0DECL(int) GMMR0Init(void)
715	{
716	LogFlow(("GMMInit:\n"));
717
718	/*
719	* Allocate the instance data and the locks.
720	*/
721	PGMM pGMM = (PGMM)RTMemAllocZ(sizeof(*pGMM));
722	if (!pGMM)
723	return VERR_NO_MEMORY;
724
725	pGMM->u32Magic = GMM_MAGIC;
726	for (unsigned i = 0; i < RT_ELEMENTS(pGMM->ChunkTLB.aEntries); i++)
727	pGMM->ChunkTLB.aEntries[i].idChunk = NIL_GMM_CHUNKID;
728	RTListInit(&pGMM->ChunkList);
729	ASMBitSet(&pGMM->bmChunkId[0], NIL_GMM_CHUNKID);
730
731	int rc = RTSemFastMutexCreate(&pGMM->hMtx);
732	if (RT_SUCCESS(rc))
733	{
734	rc = RTSemFastMutexCreate(&pGMM->hMtxCleanup);
735	if (RT_SUCCESS(rc))
736	{
737	unsigned iMtx;
738	for (iMtx = 0; iMtx < RT_ELEMENTS(pGMM->aChunkMtx); iMtx++)
739	{
740	rc = RTSemFastMutexCreate(&pGMM->aChunkMtx[iMtx].hMtx);
741	if (RT_FAILURE(rc))
742	break;
743	}
744	if (RT_SUCCESS(rc))
745	{
746	/*
747	* Check and see if RTR0MemObjAllocPhysNC works.
748	*/
749	#if 0 /* later, see #3170. */
750	RTR0MEMOBJ MemObj;
751	rc = RTR0MemObjAllocPhysNC(&MemObj, _64K, NIL_RTHCPHYS);
752	if (RT_SUCCESS(rc))
753	{
754	rc = RTR0MemObjFree(MemObj, true);
755	AssertRC(rc);
756	}
757	else if (rc == VERR_NOT_SUPPORTED)
758	pGMM->fLegacyAllocationMode = pGMM->fBoundMemoryMode = true;
759	else
760	SUPR0Printf("GMMR0Init: RTR0MemObjAllocPhysNC(,64K,Any) -> %d!\n", rc);
761	#else
762	# if defined(RT_OS_WINDOWS) \|\| (defined(RT_OS_SOLARIS) && ARCH_BITS == 64) \|\| defined(RT_OS_LINUX) \|\| defined(RT_OS_FREEBSD)
763	pGMM->fLegacyAllocationMode = false;
764	# if ARCH_BITS == 32
765	/* Don't reuse possibly partial chunks because of the virtual
766	address space limitation. */
767	pGMM->fBoundMemoryMode = true;
768	# else
769	pGMM->fBoundMemoryMode = false;
770	# endif
771	# else
772	pGMM->fLegacyAllocationMode = true;
773	pGMM->fBoundMemoryMode = true;
774	# endif
775	#endif
776
777	/*
778	* Query system page count and guess a reasonable cMaxPages value.
779	*/
780	pGMM->cMaxPages = UINT32_MAX; /** @todo IPRT function for query ram size and such. */
781
782	g_pGMM = pGMM;
783	LogFlow(("GMMInit: pGMM=%p fLegacyAllocationMode=%RTbool fBoundMemoryMode=%RTbool\n", pGMM, pGMM->fLegacyAllocationMode, pGMM->fBoundMemoryMode));
784	return VINF_SUCCESS;
785	}
786
787	/*
788	* Bail out.
789	*/
790	while (iMtx-- > 0)
791	RTSemFastMutexDestroy(pGMM->aChunkMtx[iMtx].hMtx);
792	}
793	RTSemFastMutexDestroy(pGMM->hMtx);
794	}
795
796	pGMM->u32Magic = 0;
797	RTMemFree(pGMM);
798	SUPR0Printf("GMMR0Init: failed! rc=%d\n", rc);
799	return rc;
800	}
801
802
803	/**
804	* Terminates the GMM component.
805	*/
806	GMMR0DECL(void) GMMR0Term(void)
807	{
808	LogFlow(("GMMTerm:\n"));
809
810	/*
811	* Take care / be paranoid...
812	*/
813	PGMM pGMM = g_pGMM;
814	if (!VALID_PTR(pGMM))
815	return;
816	if (pGMM->u32Magic != GMM_MAGIC)
817	{
818	SUPR0Printf("GMMR0Term: u32Magic=%#x\n", pGMM->u32Magic);
819	return;
820	}
821
822	/*
823	* Undo what init did and free all the resources we've acquired.
824	*/
825	/* Destroy the fundamentals. */
826	g_pGMM = NULL;
827	pGMM->u32Magic = ~GMM_MAGIC;
828	RTSemFastMutexDestroy(pGMM->hMtx);
829	pGMM->hMtx = NIL_RTSEMFASTMUTEX;
830	RTSemFastMutexDestroy(pGMM->hMtxCleanup);
831	pGMM->hMtxCleanup = NIL_RTSEMFASTMUTEX;
832
833	/* Free any chunks still hanging around. */
834	RTAvlU32Destroy(&pGMM->pChunks, gmmR0TermDestroyChunk, pGMM);
835
836	/* Destroy the chunk locks. */
837	for (unsigned iMtx = 0; iMtx++ < RT_ELEMENTS(pGMM->aChunkMtx); iMtx++)
838	{
839	Assert(pGMM->aChunkMtx[iMtx].cUsers == 0);
840	RTSemFastMutexDestroy(pGMM->aChunkMtx[iMtx].hMtx);
841	pGMM->aChunkMtx[iMtx].hMtx = NIL_RTSEMFASTMUTEX;
842	}
843
844	/* Finally the instance data itself. */
845	RTMemFree(pGMM);
846	LogFlow(("GMMTerm: done\n"));
847	}
848
849
850	/**
851	* RTAvlU32Destroy callback.
852	*
853	* @returns 0
854	* @param pNode The node to destroy.
855	* @param pvGMM The GMM handle.
856	*/
857	static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM)
858	{
859	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
860
861	if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
862	SUPR0Printf("GMMR0Term: %p/%#x: cFree=%d cPrivate=%d cShared=%d cMappings=%d\n", pChunk,
863	pChunk->Core.Key, pChunk->cFree, pChunk->cPrivate, pChunk->cShared, pChunk->cMappingsX);
864
865	int rc = RTR0MemObjFree(pChunk->hMemObj, true /* fFreeMappings */);
866	if (RT_FAILURE(rc))
867	{
868	SUPR0Printf("GMMR0Term: %p/%#x: RTRMemObjFree(%p,true) -> %d (cMappings=%d)\n", pChunk,
869	pChunk->Core.Key, pChunk->hMemObj, rc, pChunk->cMappingsX);
870	AssertRC(rc);
871	}
872	pChunk->hMemObj = NIL_RTR0MEMOBJ;
873
874	RTMemFree(pChunk->paMappingsX);
875	pChunk->paMappingsX = NULL;
876
877	RTMemFree(pChunk);
878	NOREF(pvGMM);
879	return 0;
880	}
881
882
883	/**
884	* Initializes the per-VM data for the GMM.
885	*
886	* This is called from within the GVMM lock (from GVMMR0CreateVM)
887	* and should only initialize the data members so GMMR0CleanupVM
888	* can deal with them. We reserve no memory or anything here,
889	* that's done later in GMMR0InitVM.
890	*
891	* @param pGVM Pointer to the Global VM structure.
892	*/
893	GMMR0DECL(void) GMMR0InitPerVMData(PGVM pGVM)
894	{
895	AssertCompile(RT_SIZEOFMEMB(GVM,gmm.s) <= RT_SIZEOFMEMB(GVM,gmm.padding));
896
897	pGVM->gmm.s.enmPolicy = GMMOCPOLICY_INVALID;
898	pGVM->gmm.s.enmPriority = GMMPRIORITY_INVALID;
899	pGVM->gmm.s.fMayAllocate = false;
900	}
901
902
903	/**
904	* Acquires the GMM giant lock.
905	*
906	* @returns Assert status code from RTSemFastMutexRequest.
907	* @param pGMM Pointer to the GMM instance.
908	*/
909	static int gmmR0MutexAcquire(PGMM pGMM)
910	{
911	ASMAtomicIncU32(&pGMM->cMtxContenders);
912	int rc = RTSemFastMutexRequest(pGMM->hMtx);
913	ASMAtomicDecU32(&pGMM->cMtxContenders);
914	AssertRC(rc);
915	#ifdef VBOX_STRICT
916	pGMM->hMtxOwner = RTThreadNativeSelf();
917	#endif
918	return rc;
919	}
920
921
922	/**
923	* Releases the GMM giant lock.
924	*
925	* @returns Assert status code from RTSemFastMutexRequest.
926	* @param pGMM Pointer to the GMM instance.
927	*/
928	static int gmmR0MutexRelease(PGMM pGMM)
929	{
930	#ifdef VBOX_STRICT
931	pGMM->hMtxOwner = NIL_RTNATIVETHREAD;
932	#endif
933	int rc = RTSemFastMutexRelease(pGMM->hMtx);
934	AssertRC(rc);
935	return rc;
936	}
937
938
939	/**
940	* Yields the GMM giant lock if there is contention and a certain minimum time
941	* has elapsed since we took it.
942	*
943	* @returns @c true if the mutex was yielded, @c false if not.
944	* @param pGMM Pointer to the GMM instance.
945	* @param puLockNanoTS Where the lock acquisition time stamp is kept
946	* (in/out).
947	*/
948	static bool gmmR0MutexYield(PGMM pGMM, uint64_t *puLockNanoTS)
949	{
950	/*
951	* If nobody is contending the mutex, don't bother checking the time.
952	*/
953	if (ASMAtomicReadU32(&pGMM->cMtxContenders) == 0)
954	return false;
955
956	/*
957	* Don't yield if we haven't executed for at least 2 milliseconds.
958	*/
959	uint64_t uNanoNow = RTTimeSystemNanoTS();
960	if (uNanoNow - *puLockNanoTS < UINT32_C(2000000))
961	return false;
962
963	/*
964	* Yield the mutex.
965	*/
966	#ifdef VBOX_STRICT
967	pGMM->hMtxOwner = NIL_RTNATIVETHREAD;
968	#endif
969	ASMAtomicIncU32(&pGMM->cMtxContenders);
970	int rc1 = RTSemFastMutexRelease(pGMM->hMtx); AssertRC(rc1);
971
972	RTThreadYield();
973
974	int rc2 = RTSemFastMutexRequest(pGMM->hMtx); AssertRC(rc2);
975	*puLockNanoTS = RTTimeSystemNanoTS();
976	ASMAtomicDecU32(&pGMM->cMtxContenders);
977	#ifdef VBOX_STRICT
978	pGMM->hMtxOwner = RTThreadNativeSelf();
979	#endif
980
981	return true;
982	}
983
984
985	/**
986	* Acquires a chunk lock.
987	*
988	* The caller must own the giant lock.
989	*
990	* @returns Assert status code from RTSemFastMutexRequest.
991	* @param pMtxState The chunk mutex state info. (Avoids
992	* passing the same flags and stuff around
993	* for subsequent release and drop-giant
994	* calls.)
995	* @param pGMM Pointer to the GMM instance.
996	* @param pChunk Pointer to the chunk.
997	* @param fFlags Flags regarding the giant lock, GMMR0CHUNK_MTX_XXX.
998	*/
999	static int gmmR0ChunkMutexAcquire(PGMMR0CHUNKMTXSTATE pMtxState, PGMM pGMM, PGMMCHUNK pChunk, uint32_t fFlags)
1000	{
1001	Assert(fFlags > GMMR0CHUNK_MTX_INVALID && fFlags < GMMR0CHUNK_MTX_END);
1002	Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
1003
1004	pMtxState->pGMM = pGMM;
1005	pMtxState->fFlags = (uint8_t)fFlags;
1006
1007	/*
1008	* Get the lock index and reference the lock.
1009	*/
1010	Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
1011	uint32_t iChunkMtx = pChunk->iChunkMtx;
1012	if (iChunkMtx == UINT8_MAX)
1013	{
1014	iChunkMtx = pGMM->iNextChunkMtx++;
1015	iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
1016
1017	/* Try get an unused one... */
1018	if (pGMM->aChunkMtx[iChunkMtx].cUsers)
1019	{
1020	iChunkMtx = pGMM->iNextChunkMtx++;
1021	iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
1022	if (pGMM->aChunkMtx[iChunkMtx].cUsers)
1023	{
1024	iChunkMtx = pGMM->iNextChunkMtx++;
1025	iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
1026	if (pGMM->aChunkMtx[iChunkMtx].cUsers)
1027	{
1028	iChunkMtx = pGMM->iNextChunkMtx++;
1029	iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
1030	}
1031	}
1032	}
1033
1034	pChunk->iChunkMtx = iChunkMtx;
1035	}
1036	AssertCompile(RT_ELEMENTS(pGMM->aChunkMtx) < UINT8_MAX);
1037	pMtxState->iChunkMtx = (uint8_t)iChunkMtx;
1038	ASMAtomicIncU32(&pGMM->aChunkMtx[iChunkMtx].cUsers);
1039
1040	/*
1041	* Drop the giant?
1042	*/
1043	if (fFlags != GMMR0CHUNK_MTX_KEEP_GIANT)
1044	{
1045	/** @todo GMM life cycle cleanup (we may race someone
1046	* destroying and cleaning up GMM)? */
1047	gmmR0MutexRelease(pGMM);
1048	}
1049
1050	/*
1051	* Take the chunk mutex.
1052	*/
1053	int rc = RTSemFastMutexRequest(pGMM->aChunkMtx[iChunkMtx].hMtx);
1054	AssertRC(rc);
1055	return rc;
1056	}
1057
1058
1059	/**
1060	* Releases the GMM giant lock.
1061	*
1062	* @returns Assert status code from RTSemFastMutexRequest.
1063	* @param pGMM Pointer to the GMM instance.
1064	* @param pChunk Pointer to the chunk if it's still
1065	* alive, NULL if it isn't. This is used to deassociate
1066	* the chunk from the mutex on the way out so a new one
1067	* can be selected next time, thus avoiding contented
1068	* mutexes.
1069	*/
1070	static int gmmR0ChunkMutexRelease(PGMMR0CHUNKMTXSTATE pMtxState, PGMMCHUNK pChunk)
1071	{
1072	PGMM pGMM = pMtxState->pGMM;
1073
1074	/*
1075	* Release the chunk mutex and reacquire the giant if requested.
1076	*/
1077	int rc = RTSemFastMutexRelease(pGMM->aChunkMtx[pMtxState->iChunkMtx].hMtx);
1078	AssertRC(rc);
1079	if (pMtxState->fFlags == GMMR0CHUNK_MTX_RETAKE_GIANT)
1080	rc = gmmR0MutexAcquire(pGMM);
1081	else
1082	Assert((pMtxState->fFlags != GMMR0CHUNK_MTX_DROP_GIANT) == (pGMM->hMtxOwner == RTThreadNativeSelf()));
1083
1084	/*
1085	* Drop the chunk mutex user reference and deassociate it from the chunk
1086	* when possible.
1087	*/
1088	if ( ASMAtomicDecU32(&pGMM->aChunkMtx[pMtxState->iChunkMtx].cUsers) == 0
1089	&& pChunk
1090	&& RT_SUCCESS(rc) )
1091	{
1092	if (pMtxState->fFlags != GMMR0CHUNK_MTX_DROP_GIANT)
1093	pChunk->iChunkMtx = UINT8_MAX;
1094	else
1095	{
1096	rc = gmmR0MutexAcquire(pGMM);
1097	if (RT_SUCCESS(rc))
1098	{
1099	if (pGMM->aChunkMtx[pMtxState->iChunkMtx].cUsers == 0)
1100	pChunk->iChunkMtx = UINT8_MAX;
1101	rc = gmmR0MutexRelease(pGMM);
1102	}
1103	}
1104	}
1105
1106	pMtxState->pGMM = NULL;
1107	return rc;
1108	}
1109
1110
1111	/**
1112	* Drops the giant GMM lock we kept in gmmR0ChunkMutexAcquire while keeping the
1113	* chunk locked.
1114	*
1115	* This only works if gmmR0ChunkMutexAcquire was called with
1116	* GMMR0CHUNK_MTX_KEEP_GIANT. Release will NOT retake the giant
1117	* when dropped this way, the behavior will be like if
1118	* GMMR0CHUNK_MTX_DROP_GIANT was used.
1119	*
1120	* @returns VBox status code (assuming success is ok).
1121	* @param pMtxState Pointer to the chunk mutex state.
1122	*/
1123	static int gmmR0ChunkMutexDropGiant(PGMMR0CHUNKMTXSTATE pMtxState)
1124	{
1125	AssertReturn(pMtxState->fFlags == GMMR0CHUNK_MTX_KEEP_GIANT, VERR_INTERNAL_ERROR_2);
1126	Assert(pMtxState->pGMM->hMtxOwner == RTThreadNativeSelf());
1127	pMtxState->fFlags = GMMR0CHUNK_MTX_DROP_GIANT;
1128	/** @todo GMM life cycle cleanup (we may race someone
1129	* destroying and cleaning up GMM)? */
1130	return gmmR0MutexRelease(pMtxState->pGMM);
1131	}
1132
1133
1134	/**
1135	* Cleans up when a VM is terminating.
1136	*
1137	* @param pGVM Pointer to the Global VM structure.
1138	*/
1139	GMMR0DECL(void) GMMR0CleanupVM(PGVM pGVM)
1140	{
1141	LogFlow(("GMMR0CleanupVM: pGVM=%p:{.pVM=%p, .hSelf=%#x}\n", pGVM, pGVM->pVM, pGVM->hSelf));
1142
1143	PGMM pGMM;
1144	GMM_GET_VALID_INSTANCE_VOID(pGMM);
1145
1146	#ifdef VBOX_WITH_PAGE_SHARING
1147	/*
1148	* Clean up all registered shared modules first.
1149	*/
1150	gmmR0SharedModuleCleanup(pGMM, pGVM);
1151	#endif
1152
1153	int rc = RTSemFastMutexRequest(pGMM->hMtxCleanup); AssertRC(rc);
1154	gmmR0MutexAcquire(pGMM);
1155	uint64_t uLockNanoTS = RTTimeSystemNanoTS();
1156	GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
1157
1158	/*
1159	* The policy is 'INVALID' until the initial reservation
1160	* request has been serviced.
1161	*/
1162	if ( pGVM->gmm.s.enmPolicy > GMMOCPOLICY_INVALID
1163	&& pGVM->gmm.s.enmPolicy < GMMOCPOLICY_END)
1164	{
1165	/*
1166	* If it's the last VM around, we can skip walking all the chunk looking
1167	* for the pages owned by this VM and instead flush the whole shebang.
1168	*
1169	* This takes care of the eventuality that a VM has left shared page
1170	* references behind (shouldn't happen of course, but you never know).
1171	*/
1172	Assert(pGMM->cRegisteredVMs);
1173	pGMM->cRegisteredVMs--;
1174
1175	/*
1176	* Walk the entire pool looking for pages that belong to this VM
1177	* and left over mappings. (This'll only catch private pages,
1178	* shared pages will be 'left behind'.)
1179	*/
1180	uint64_t cPrivatePages = pGVM->gmm.s.cPrivatePages; /* save */
1181
1182	unsigned iCountDown = 64;
1183	bool fRedoFromStart;
1184	PGMMCHUNK pChunk;
1185	do
1186	{
1187	fRedoFromStart = false;
1188	RTListForEachReverse(&pGMM->ChunkList, pChunk, GMMCHUNK, ListNode)
1189	{
1190	uint32_t const cFreeChunksOld = pGMM->cFreedChunks;
1191	if (gmmR0CleanupVMScanChunk(pGMM, pGVM, pChunk))
1192	{
1193	gmmR0MutexAcquire(pGMM);
1194	uLockNanoTS = RTTimeSystemNanoTS();
1195	}
1196	else
1197	{
1198	if (!iCountDown)
1199	gmmR0MutexYield(pGMM, &uLockNanoTS);
1200	else
1201	iCountDown--;
1202	}
1203	if (pGMM->cFreedChunks != cFreeChunksOld)
1204	break;
1205	}
1206	} while (fRedoFromStart);
1207
1208	if (pGVM->gmm.s.cPrivatePages)
1209	SUPR0Printf("GMMR0CleanupVM: hGVM=%#x has %#x private pages that cannot be found!\n", pGVM->hSelf, pGVM->gmm.s.cPrivatePages);
1210
1211	pGMM->cAllocatedPages -= cPrivatePages;
1212
1213	/*
1214	* Free empty chunks.
1215	*/
1216	do
1217	{
1218	iCountDown = 10240;
1219	pChunk = pGMM->Private.apLists[RT_ELEMENTS(pGMM->Private.apLists) - 1];
1220	while (pChunk)
1221	{
1222	PGMMCHUNK pNext = pChunk->pFreeNext;
1223	if ( pChunk->cFree == GMM_CHUNK_NUM_PAGES
1224	&& ( !pGMM->fBoundMemoryMode
1225	\|\| pChunk->hGVM == pGVM->hSelf))
1226	{
1227	gmmR0FreeChunk(pGMM, pGVM, pChunk);
1228	iCountDown = 1;
1229	}
1230	pChunk = pNext;
1231
1232	if (--iCountDown == 0)
1233	{
1234	uint64_t const idGenerationOld = pGMM->Private.idGeneration;
1235	fRedoFromStart = gmmR0MutexYield(pGMM, &uLockNanoTS)
1236	&& pGMM->Private.idGeneration != idGenerationOld;
1237	if (fRedoFromStart)
1238	break;
1239	iCountDown = 10240;
1240	}
1241	}
1242	} while (fRedoFromStart);
1243
1244	/*
1245	* Account for shared pages that weren't freed.
1246	*/
1247	if (pGVM->gmm.s.cSharedPages)
1248	{
1249	Assert(pGMM->cSharedPages >= pGVM->gmm.s.cSharedPages);
1250	SUPR0Printf("GMMR0CleanupVM: hGVM=%#x left %#x shared pages behind!\n", pGVM->hSelf, pGVM->gmm.s.cSharedPages);
1251	pGMM->cLeftBehindSharedPages += pGVM->gmm.s.cSharedPages;
1252	}
1253
1254	/*
1255	* Clean up balloon statistics in case the VM process crashed.
1256	*/
1257	Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.cBalloonedPages);
1258	pGMM->cBalloonedPages -= pGVM->gmm.s.cBalloonedPages;
1259
1260	/*
1261	* Update the over-commitment management statistics.
1262	*/
1263	pGMM->cReservedPages -= pGVM->gmm.s.Reserved.cBasePages
1264	+ pGVM->gmm.s.Reserved.cFixedPages
1265	+ pGVM->gmm.s.Reserved.cShadowPages;
1266	switch (pGVM->gmm.s.enmPolicy)
1267	{
1268	case GMMOCPOLICY_NO_OC:
1269	break;
1270	default:
1271	/** @todo Update GMM->cOverCommittedPages */
1272	break;
1273	}
1274	}
1275
1276	/* zap the GVM data. */
1277	pGVM->gmm.s.enmPolicy = GMMOCPOLICY_INVALID;
1278	pGVM->gmm.s.enmPriority = GMMPRIORITY_INVALID;
1279	pGVM->gmm.s.fMayAllocate = false;
1280
1281	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1282	gmmR0MutexRelease(pGMM);
1283	RTSemFastMutexRelease(pGMM->hMtxCleanup);
1284
1285	LogFlow(("GMMR0CleanupVM: returns\n"));
1286	}
1287
1288
1289	/**
1290	* Scan one chunk for private pages belonging to the specified VM.
1291	*
1292	* @note This function is ugly since may drop the ownership of the giant GMM
1293	* mutex!
1294	*
1295	* @returns @c true if we've dropped the giant mutex, @c false if we didn't.
1296	* @param pGMM Pointer to the GMM instance.
1297	* @param pGVM The global VM handle.
1298	* @param pChunk The chunk to scan.
1299	*/
1300	static bool gmmR0CleanupVMScanChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
1301	{
1302	/*
1303	* Look for pages belonging to the VM.
1304	* (Perform some internal checks while we're scanning.)
1305	*/
1306	#ifndef VBOX_STRICT
1307	if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
1308	#endif
1309	{
1310	unsigned cPrivate = 0;
1311	unsigned cShared = 0;
1312	unsigned cFree = 0;
1313
1314	gmmR0UnlinkChunk(pChunk); /* avoiding cFreePages updates. */
1315
1316	uint16_t hGVM = pGVM->hSelf;
1317	unsigned iPage = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
1318	while (iPage-- > 0)
1319	if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
1320	{
1321	if (pChunk->aPages[iPage].Private.hGVM == hGVM)
1322	{
1323	/*
1324	* Free the page.
1325	*
1326	* The reason for not using gmmR0FreePrivatePage here is that we
1327	* must not cause the chunk to be freed from under us - we're in
1328	* an AVL tree walk here.
1329	*/
1330	pChunk->aPages[iPage].u = 0;
1331	pChunk->aPages[iPage].Free.iNext = pChunk->iFreeHead;
1332	pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
1333	pChunk->iFreeHead = iPage;
1334	pChunk->cPrivate--;
1335	pChunk->cFree++;
1336	pGVM->gmm.s.cPrivatePages--;
1337	cFree++;
1338	}
1339	else
1340	cPrivate++;
1341	}
1342	else if (GMM_PAGE_IS_FREE(&pChunk->aPages[iPage]))
1343	cFree++;
1344	else
1345	cShared++;
1346
1347	gmmR0LinkChunk(pChunk, pChunk->cShared ? &g_pGMM->Shared : &g_pGMM->Private);
1348
1349	/*
1350	* Did it add up?
1351	*/
1352	if (RT_UNLIKELY( pChunk->cFree != cFree
1353	\|\| pChunk->cPrivate != cPrivate
1354	\|\| pChunk->cShared != cShared))
1355	{
1356	SUPR0Printf("gmmR0CleanupVMScanChunk: Chunk %p/%#x has bogus stats - free=%d/%d private=%d/%d shared=%d/%d\n",
1357	pChunk->cFree, cFree, pChunk->cPrivate, cPrivate, pChunk->cShared, cShared);
1358	pChunk->cFree = cFree;
1359	pChunk->cPrivate = cPrivate;
1360	pChunk->cShared = cShared;
1361	}
1362	}
1363
1364	/*
1365	* If not in bound memory mode, we should reset the hGVM field
1366	* if it has our handle in it.
1367	*/
1368	if (pChunk->hGVM == pGVM->hSelf)
1369	{
1370	if (!g_pGMM->fBoundMemoryMode)
1371	pChunk->hGVM = NIL_GVM_HANDLE;
1372	else if (pChunk->cFree != GMM_CHUNK_NUM_PAGES)
1373	{
1374	SUPR0Printf("gmmR0CleanupVMScanChunk: %p/%#x: cFree=%#x - it should be 0 in bound mode!\n",
1375	pChunk, pChunk->Core.Key, pChunk->cFree);
1376	AssertMsgFailed(("%p/%#x: cFree=%#x - it should be 0 in bound mode!\n", pChunk, pChunk->Core.Key, pChunk->cFree));
1377
1378	gmmR0UnlinkChunk(pChunk);
1379	pChunk->cFree = GMM_CHUNK_NUM_PAGES;
1380	gmmR0LinkChunk(pChunk, pChunk->cShared ? &g_pGMM->Shared : &g_pGMM->Private);
1381	}
1382	}
1383
1384	/*
1385	* Look for a mapping belonging to the terminating VM.
1386	*/
1387	GMMR0CHUNKMTXSTATE MtxState;
1388	gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
1389	unsigned cMappings = pChunk->cMappingsX;
1390	for (unsigned i = 0; i < cMappings; i++)
1391	if (pChunk->paMappingsX[i].pGVM == pGVM)
1392	{
1393	gmmR0ChunkMutexDropGiant(&MtxState);
1394
1395	RTR0MEMOBJ hMemObj = pChunk->paMappingsX[i].hMapObj;
1396
1397	cMappings--;
1398	if (i < cMappings)
1399	pChunk->paMappingsX[i] = pChunk->paMappingsX[cMappings];
1400	pChunk->paMappingsX[cMappings].pGVM = NULL;
1401	pChunk->paMappingsX[cMappings].hMapObj = NIL_RTR0MEMOBJ;
1402	Assert(pChunk->cMappingsX - 1U == cMappings);
1403	pChunk->cMappingsX = cMappings;
1404
1405	int rc = RTR0MemObjFree(hMemObj, false /* fFreeMappings (NA) */);
1406	if (RT_FAILURE(rc))
1407	{
1408	SUPR0Printf("gmmR0CleanupVMScanChunk: %p/%#x: mapping #%x: RTRMemObjFree(%p,false) -> %d \n",
1409	pChunk, pChunk->Core.Key, i, hMemObj, rc);
1410	AssertRC(rc);
1411	}
1412	gmmR0ChunkMutexRelease(&MtxState, pChunk);
1413	return true;
1414	}
1415
1416	gmmR0ChunkMutexRelease(&MtxState, pChunk);
1417	return false;
1418	}
1419
1420
1421	/**
1422	* The initial resource reservations.
1423	*
1424	* This will make memory reservations according to policy and priority. If there aren't
1425	* sufficient resources available to sustain the VM this function will fail and all
1426	* future allocations requests will fail as well.
1427	*
1428	* These are just the initial reservations made very very early during the VM creation
1429	* process and will be adjusted later in the GMMR0UpdateReservation call after the
1430	* ring-3 init has completed.
1431	*
1432	* @returns VBox status code.
1433	* @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1434	* @retval VERR_GMM_
1435	*
1436	* @param pVM Pointer to the shared VM structure.
1437	* @param idCpu VCPU id
1438	* @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1439	* This does not include MMIO2 and similar.
1440	* @param cShadowPages The number of pages that may be allocated for shadow paging structures.
1441	* @param cFixedPages The number of pages that may be allocated for fixed objects like the
1442	* hyper heap, MMIO2 and similar.
1443	* @param enmPolicy The OC policy to use on this VM.
1444	* @param enmPriority The priority in an out-of-memory situation.
1445	*
1446	* @thread The creator thread / EMT.
1447	*/
1448	GMMR0DECL(int) GMMR0InitialReservation(PVM pVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages,
1449	GMMOCPOLICY enmPolicy, GMMPRIORITY enmPriority)
1450	{
1451	LogFlow(("GMMR0InitialReservation: pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x enmPolicy=%d enmPriority=%d\n",
1452	pVM, cBasePages, cShadowPages, cFixedPages, enmPolicy, enmPriority));
1453
1454	/*
1455	* Validate, get basics and take the semaphore.
1456	*/
1457	PGMM pGMM;
1458	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1459	PGVM pGVM;
1460	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
1461	if (RT_FAILURE(rc))
1462	return rc;
1463
1464	AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1465	AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1466	AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1467	AssertReturn(enmPolicy > GMMOCPOLICY_INVALID && enmPolicy < GMMOCPOLICY_END, VERR_INVALID_PARAMETER);
1468	AssertReturn(enmPriority > GMMPRIORITY_INVALID && enmPriority < GMMPRIORITY_END, VERR_INVALID_PARAMETER);
1469
1470	gmmR0MutexAcquire(pGMM);
1471	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1472	{
1473	if ( !pGVM->gmm.s.Reserved.cBasePages
1474	&& !pGVM->gmm.s.Reserved.cFixedPages
1475	&& !pGVM->gmm.s.Reserved.cShadowPages)
1476	{
1477	/*
1478	* Check if we can accommodate this.
1479	*/
1480	/* ... later ... */
1481	if (RT_SUCCESS(rc))
1482	{
1483	/*
1484	* Update the records.
1485	*/
1486	pGVM->gmm.s.Reserved.cBasePages = cBasePages;
1487	pGVM->gmm.s.Reserved.cFixedPages = cFixedPages;
1488	pGVM->gmm.s.Reserved.cShadowPages = cShadowPages;
1489	pGVM->gmm.s.enmPolicy = enmPolicy;
1490	pGVM->gmm.s.enmPriority = enmPriority;
1491	pGVM->gmm.s.fMayAllocate = true;
1492
1493	pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1494	pGMM->cRegisteredVMs++;
1495	}
1496	}
1497	else
1498	rc = VERR_WRONG_ORDER;
1499	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1500	}
1501	else
1502	rc = VERR_INTERNAL_ERROR_5;
1503	gmmR0MutexRelease(pGMM);
1504	LogFlow(("GMMR0InitialReservation: returns %Rrc\n", rc));
1505	return rc;
1506	}
1507
1508
1509	/**
1510	* VMMR0 request wrapper for GMMR0InitialReservation.
1511	*
1512	* @returns see GMMR0InitialReservation.
1513	* @param pVM Pointer to the shared VM structure.
1514	* @param idCpu VCPU id
1515	* @param pReq The request packet.
1516	*/
1517	GMMR0DECL(int) GMMR0InitialReservationReq(PVM pVM, VMCPUID idCpu, PGMMINITIALRESERVATIONREQ pReq)
1518	{
1519	/*
1520	* Validate input and pass it on.
1521	*/
1522	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1523	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1524	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
1525
1526	return GMMR0InitialReservation(pVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages, pReq->enmPolicy, pReq->enmPriority);
1527	}
1528
1529
1530	/**
1531	* This updates the memory reservation with the additional MMIO2 and ROM pages.
1532	*
1533	* @returns VBox status code.
1534	* @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1535	*
1536	* @param pVM Pointer to the shared VM structure.
1537	* @param idCpu VCPU id
1538	* @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1539	* This does not include MMIO2 and similar.
1540	* @param cShadowPages The number of pages that may be allocated for shadow paging structures.
1541	* @param cFixedPages The number of pages that may be allocated for fixed objects like the
1542	* hyper heap, MMIO2 and similar.
1543	*
1544	* @thread EMT.
1545	*/
1546	GMMR0DECL(int) GMMR0UpdateReservation(PVM pVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages)
1547	{
1548	LogFlow(("GMMR0UpdateReservation: pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x\n",
1549	pVM, cBasePages, cShadowPages, cFixedPages));
1550
1551	/*
1552	* Validate, get basics and take the semaphore.
1553	*/
1554	PGMM pGMM;
1555	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1556	PGVM pGVM;
1557	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
1558	if (RT_FAILURE(rc))
1559	return rc;
1560
1561	AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1562	AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1563	AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1564
1565	gmmR0MutexAcquire(pGMM);
1566	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1567	{
1568	if ( pGVM->gmm.s.Reserved.cBasePages
1569	&& pGVM->gmm.s.Reserved.cFixedPages
1570	&& pGVM->gmm.s.Reserved.cShadowPages)
1571	{
1572	/*
1573	* Check if we can accommodate this.
1574	*/
1575	/* ... later ... */
1576	if (RT_SUCCESS(rc))
1577	{
1578	/*
1579	* Update the records.
1580	*/
1581	pGMM->cReservedPages -= pGVM->gmm.s.Reserved.cBasePages
1582	+ pGVM->gmm.s.Reserved.cFixedPages
1583	+ pGVM->gmm.s.Reserved.cShadowPages;
1584	pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1585
1586	pGVM->gmm.s.Reserved.cBasePages = cBasePages;
1587	pGVM->gmm.s.Reserved.cFixedPages = cFixedPages;
1588	pGVM->gmm.s.Reserved.cShadowPages = cShadowPages;
1589	}
1590	}
1591	else
1592	rc = VERR_WRONG_ORDER;
1593	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1594	}
1595	else
1596	rc = VERR_INTERNAL_ERROR_5;
1597	gmmR0MutexRelease(pGMM);
1598	LogFlow(("GMMR0UpdateReservation: returns %Rrc\n", rc));
1599	return rc;
1600	}
1601
1602
1603	/**
1604	* VMMR0 request wrapper for GMMR0UpdateReservation.
1605	*
1606	* @returns see GMMR0UpdateReservation.
1607	* @param pVM Pointer to the shared VM structure.
1608	* @param idCpu VCPU id
1609	* @param pReq The request packet.
1610	*/
1611	GMMR0DECL(int) GMMR0UpdateReservationReq(PVM pVM, VMCPUID idCpu, PGMMUPDATERESERVATIONREQ pReq)
1612	{
1613	/*
1614	* Validate input and pass it on.
1615	*/
1616	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1617	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1618	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
1619
1620	return GMMR0UpdateReservation(pVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages);
1621	}
1622
1623
1624	/**
1625	* Performs sanity checks on a free set.
1626	*
1627	* @returns Error count.
1628	*
1629	* @param pGMM Pointer to the GMM instance.
1630	* @param pSet Pointer to the set.
1631	* @param pszSetName The set name.
1632	* @param pszFunction The function from which it was called.
1633	* @param uLine The line number.
1634	*/
1635	static uint32_t gmmR0SanityCheckSet(PGMM pGMM, PGMMCHUNKFREESET pSet, const char *pszSetName,
1636	const char *pszFunction, unsigned uLineNo)
1637	{
1638	uint32_t cErrors = 0;
1639
1640	/*
1641	* Count the free pages in all the chunks and match it against pSet->cFreePages.
1642	*/
1643	uint32_t cPages = 0;
1644	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1645	{
1646	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1647	{
1648	/** @todo check that the chunk is hash into the right set. */
1649	cPages += pCur->cFree;
1650	}
1651	}
1652	if (RT_UNLIKELY(cPages != pSet->cFreePages))
1653	{
1654	SUPR0Printf("GMM insanity: found %#x pages in the %s set, expected %#x. (%s, line %u)\n",
1655	cPages, pszSetName, pSet->cFreePages, pszFunction, uLineNo);
1656	cErrors++;
1657	}
1658
1659	return cErrors;
1660	}
1661
1662
1663	/**
1664	* Performs some sanity checks on the GMM while owning lock.
1665	*
1666	* @returns Error count.
1667	*
1668	* @param pGMM Pointer to the GMM instance.
1669	* @param pszFunction The function from which it is called.
1670	* @param uLineNo The line number.
1671	*/
1672	static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo)
1673	{
1674	uint32_t cErrors = 0;
1675
1676	cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->Private, "private", pszFunction, uLineNo);
1677	cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->Shared, "shared", pszFunction, uLineNo);
1678	/** @todo add more sanity checks. */
1679
1680	return cErrors;
1681	}
1682
1683
1684	/**
1685	* Looks up a chunk in the tree and fill in the TLB entry for it.
1686	*
1687	* This is not expected to fail and will bitch if it does.
1688	*
1689	* @returns Pointer to the allocation chunk, NULL if not found.
1690	* @param pGMM Pointer to the GMM instance.
1691	* @param idChunk The ID of the chunk to find.
1692	* @param pTlbe Pointer to the TLB entry.
1693	*/
1694	static PGMMCHUNK gmmR0GetChunkSlow(PGMM pGMM, uint32_t idChunk, PGMMCHUNKTLBE pTlbe)
1695	{
1696	PGMMCHUNK pChunk = (PGMMCHUNK)RTAvlU32Get(&pGMM->pChunks, idChunk);
1697	AssertMsgReturn(pChunk, ("Chunk %#x not found!\n", idChunk), NULL);
1698	pTlbe->idChunk = idChunk;
1699	pTlbe->pChunk = pChunk;
1700	return pChunk;
1701	}
1702
1703
1704	/**
1705	* Finds a allocation chunk.
1706	*
1707	* This is not expected to fail and will bitch if it does.
1708	*
1709	* @returns Pointer to the allocation chunk, NULL if not found.
1710	* @param pGMM Pointer to the GMM instance.
1711	* @param idChunk The ID of the chunk to find.
1712	*/
1713	DECLINLINE(PGMMCHUNK) gmmR0GetChunk(PGMM pGMM, uint32_t idChunk)
1714	{
1715	/*
1716	* Do a TLB lookup, branch if not in the TLB.
1717	*/
1718	PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(idChunk)];
1719	if ( pTlbe->idChunk != idChunk
1720	\|\| !pTlbe->pChunk)
1721	return gmmR0GetChunkSlow(pGMM, idChunk, pTlbe);
1722	return pTlbe->pChunk;
1723	}
1724
1725
1726	/**
1727	* Finds a page.
1728	*
1729	* This is not expected to fail and will bitch if it does.
1730	*
1731	* @returns Pointer to the page, NULL if not found.
1732	* @param pGMM Pointer to the GMM instance.
1733	* @param idPage The ID of the page to find.
1734	*/
1735	DECLINLINE(PGMMPAGE) gmmR0GetPage(PGMM pGMM, uint32_t idPage)
1736	{
1737	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
1738	if (RT_LIKELY(pChunk))
1739	return &pChunk->aPages[idPage & GMM_PAGEID_IDX_MASK];
1740	return NULL;
1741	}
1742
1743
1744	/**
1745	* Gets the host physical address for a page given by it's ID.
1746	*
1747	* @returns The host physical address or NIL_RTHCPHYS.
1748	* @param pGMM Pointer to the GMM instance.
1749	* @param idPage The ID of the page to find.
1750	*/
1751	DECLINLINE(RTHCPHYS) gmmR0GetPageHCPhys(PGMM pGMM, uint32_t idPage)
1752	{
1753	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
1754	if (RT_LIKELY(pChunk))
1755	return RTR0MemObjGetPagePhysAddr(pChunk->hMemObj, idPage & GMM_PAGEID_IDX_MASK);
1756	return NIL_RTHCPHYS;
1757	}
1758
1759
1760	/**
1761	* Selects the appropriate free list given the number of free pages.
1762	*
1763	* @returns Free list index.
1764	* @param
1765	*/
1766	DECLINLINE(unsigned) gmmR0SelectFreeSetList(unsigned cFree)
1767	{
1768	return (cFree - 1) >> GMM_CHUNK_FREE_SET_SHIFT;
1769	}
1770
1771
1772	/**
1773	* Unlinks the chunk from the free list it's currently on (if any).
1774	*
1775	* @param pChunk The allocation chunk.
1776	*/
1777	DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk)
1778	{
1779	PGMMCHUNKFREESET pSet = pChunk->pSet;
1780	if (RT_LIKELY(pSet))
1781	{
1782	pSet->cFreePages -= pChunk->cFree;
1783	pSet->idGeneration++;
1784
1785	PGMMCHUNK pPrev = pChunk->pFreePrev;
1786	PGMMCHUNK pNext = pChunk->pFreeNext;
1787	if (pPrev)
1788	pPrev->pFreeNext = pNext;
1789	else
1790	pSet->apLists[gmmR0SelectFreeSetList(pChunk->cFree)] = pNext;
1791	if (pNext)
1792	pNext->pFreePrev = pPrev;
1793
1794	pChunk->pSet = NULL;
1795	pChunk->pFreeNext = NULL;
1796	pChunk->pFreePrev = NULL;
1797	}
1798	else
1799	{
1800	Assert(!pChunk->pFreeNext);
1801	Assert(!pChunk->pFreePrev);
1802	Assert(!pChunk->cFree);
1803	}
1804	}
1805
1806
1807	/**
1808	* Links the chunk onto the appropriate free list in the specified free set.
1809	*
1810	* If no free entries, it's not linked into any list.
1811	*
1812	* @param pChunk The allocation chunk.
1813	* @param pSet The free set.
1814	*/
1815	DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet)
1816	{
1817	Assert(!pChunk->pSet);
1818	Assert(!pChunk->pFreeNext);
1819	Assert(!pChunk->pFreePrev);
1820
1821	if (pChunk->cFree > 0)
1822	{
1823	pChunk->pSet = pSet;
1824	pChunk->pFreePrev = NULL;
1825	unsigned const iList = gmmR0SelectFreeSetList(pChunk->cFree);
1826	pChunk->pFreeNext = pSet->apLists[iList];
1827	if (pChunk->pFreeNext)
1828	pChunk->pFreeNext->pFreePrev = pChunk;
1829	pSet->apLists[iList] = pChunk;
1830
1831	pSet->cFreePages += pChunk->cFree;
1832	pSet->idGeneration++;
1833	}
1834	}
1835
1836
1837	/**
1838	* Frees a Chunk ID.
1839	*
1840	* @param pGMM Pointer to the GMM instance.
1841	* @param idChunk The Chunk ID to free.
1842	*/
1843	static void gmmR0FreeChunkId(PGMM pGMM, uint32_t idChunk)
1844	{
1845	AssertReturnVoid(idChunk != NIL_GMM_CHUNKID);
1846	AssertMsg(ASMBitTest(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk));
1847	ASMAtomicBitClear(&pGMM->bmChunkId[0], idChunk);
1848	}
1849
1850
1851	/**
1852	* Allocates a new Chunk ID.
1853	*
1854	* @returns The Chunk ID.
1855	* @param pGMM Pointer to the GMM instance.
1856	*/
1857	static uint32_t gmmR0AllocateChunkId(PGMM pGMM)
1858	{
1859	AssertCompile(!((GMM_CHUNKID_LAST + 1) & 31)); /* must be a multiple of 32 */
1860	AssertCompile(NIL_GMM_CHUNKID == 0);
1861
1862	/*
1863	* Try the next sequential one.
1864	*/
1865	int32_t idChunk = ++pGMM->idChunkPrev;
1866	#if 0 /** @todo enable this code */
1867	if ( idChunk <= GMM_CHUNKID_LAST
1868	&& idChunk > NIL_GMM_CHUNKID
1869	&& !ASMAtomicBitTestAndSet(&pVMM->bmChunkId[0], idChunk))
1870	return idChunk;
1871	#endif
1872
1873	/*
1874	* Scan sequentially from the last one.
1875	*/
1876	if ( (uint32_t)idChunk < GMM_CHUNKID_LAST
1877	&& idChunk > NIL_GMM_CHUNKID)
1878	{
1879	idChunk = ASMBitNextClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1, idChunk);
1880	if (idChunk > NIL_GMM_CHUNKID)
1881	{
1882	AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
1883	return pGMM->idChunkPrev = idChunk;
1884	}
1885	}
1886
1887	/*
1888	* Ok, scan from the start.
1889	* We're not racing anyone, so there is no need to expect failures or have restart loops.
1890	*/
1891	idChunk = ASMBitFirstClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1);
1892	AssertMsgReturn(idChunk > NIL_GMM_CHUNKID, ("%#x\n", idChunk), NIL_GVM_HANDLE);
1893	AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
1894
1895	return pGMM->idChunkPrev = idChunk;
1896	}
1897
1898
1899	/**
1900	* Registers a new chunk of memory.
1901	*
1902	* This is called by both gmmR0AllocateOneChunk and GMMR0SeedChunk.
1903	*
1904	* @returns VBox status code. On success, the giant GMM lock will be held, the
1905	* caller must release it (ugly).
1906	* @param pGMM Pointer to the GMM instance.
1907	* @param pSet Pointer to the set.
1908	* @param MemObj The memory object for the chunk.
1909	* @param hGVM The affinity of the chunk. NIL_GVM_HANDLE for no
1910	* affinity.
1911	* @param fChunkFlags The chunk flags, GMM_CHUNK_FLAGS_XXX.
1912	* @param ppChunk Chunk address (out). Optional.
1913	*
1914	* @remarks The caller must not own the giant GMM mutex.
1915	* The giant GMM mutex will be acquired and returned acquired in
1916	* the success path. On failure, no locks will be held.
1917	*/
1918	static int gmmR0RegisterChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, RTR0MEMOBJ MemObj, uint16_t hGVM, uint16_t fChunkFlags,
1919	PGMMCHUNK *ppChunk)
1920	{
1921	Assert(pGMM->hMtxOwner != RTThreadNativeSelf());
1922	Assert(hGVM != NIL_GVM_HANDLE \|\| pGMM->fBoundMemoryMode);
1923	Assert(fChunkFlags == 0 \|\| fChunkFlags == GMM_CHUNK_FLAGS_LARGE_PAGE);
1924
1925	int rc;
1926	PGMMCHUNK pChunk = (PGMMCHUNK)RTMemAllocZ(sizeof(*pChunk));
1927	if (pChunk)
1928	{
1929	/*
1930	* Initialize it.
1931	*/
1932	pChunk->hMemObj = MemObj;
1933	pChunk->cFree = GMM_CHUNK_NUM_PAGES;
1934	pChunk->hGVM = hGVM;
1935	/pChunk->iFreeHead = 0;/
1936	pChunk->idNumaNode = GMM_CHUNK_NUMA_ID_UNKNOWN;
1937	pChunk->iChunkMtx = UINT8_MAX;
1938	pChunk->fFlags = fChunkFlags;
1939	for (unsigned iPage = 0; iPage < RT_ELEMENTS(pChunk->aPages) - 1; iPage++)
1940	{
1941	pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
1942	pChunk->aPages[iPage].Free.iNext = iPage + 1;
1943	}
1944	pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.u2State = GMM_PAGE_STATE_FREE;
1945	pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.iNext = UINT16_MAX;
1946
1947	/*
1948	* Allocate a Chunk ID and insert it into the tree.
1949	* This has to be done behind the mutex of course.
1950	*/
1951	rc = gmmR0MutexAcquire(pGMM);
1952	if (RT_SUCCESS(rc))
1953	{
1954	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1955	{
1956	pChunk->Core.Key = gmmR0AllocateChunkId(pGMM);
1957	if ( pChunk->Core.Key != NIL_GMM_CHUNKID
1958	&& pChunk->Core.Key <= GMM_CHUNKID_LAST
1959	&& RTAvlU32Insert(&pGMM->pChunks, &pChunk->Core))
1960	{
1961	pGMM->cChunks++;
1962	RTListAppend(&pGMM->ChunkList, &pChunk->ListNode);
1963	gmmR0LinkChunk(pChunk, pSet);
1964	LogFlow(("gmmR0RegisterChunk: pChunk=%p id=%#x cChunks=%d\n", pChunk, pChunk->Core.Key, pGMM->cChunks));
1965
1966	if (ppChunk)
1967	*ppChunk = pChunk;
1968	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1969	return VINF_SUCCESS;
1970	}
1971
1972	/* bail out */
1973	rc = VERR_INTERNAL_ERROR;
1974	}
1975	else
1976	rc = VERR_INTERNAL_ERROR_5;
1977	gmmR0MutexRelease(pGMM);
1978	}
1979
1980	RTMemFree(pChunk);
1981	}
1982	else
1983	rc = VERR_NO_MEMORY;
1984	return rc;
1985	}
1986
1987
1988	/**
1989	* Allocate one new chunk and add it to the specified free set.
1990	*
1991	* @returns VBox status code.
1992	* @param pGMM Pointer to the GMM instance.
1993	* @param pSet Pointer to the set.
1994	* @param hGVM The affinity of the new chunk.
1995	*
1996	* @remarks The giant mutex will be temporarily abandond during the allocation.
1997	*/
1998	static int gmmR0AllocateOneChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, uint16_t hGVM)
1999	{
2000	/*
2001	* Allocate the memory.
2002	*
2003	* Note! We leave the giant GMM lock temporarily as the allocation might
2004	* take a long time. gmmR0RegisterChunk reacquires it (ugly).
2005	*/
2006	gmmR0MutexRelease(pGMM);
2007
2008	RTR0MEMOBJ hMemObj;
2009	int rc = RTR0MemObjAllocPhysNC(&hMemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS);
2010	/** @todo Check that RTR0MemObjAllocPhysNC always returns VERR_NO_MEMORY on
2011	* allocation failure. */
2012	if (RT_SUCCESS(rc))
2013	{
2014	rc = gmmR0RegisterChunk(pGMM, pSet, hMemObj, hGVM, 0 /fChunkFlags/, NULL);
2015	if (RT_SUCCESS(rc))
2016	return rc;
2017
2018	RTR0MemObjFree(hMemObj, false /* fFreeMappings */);
2019	}
2020
2021	int rc2 = gmmR0MutexAcquire(pGMM);
2022	AssertRCReturn(rc2, RT_FAILURE(rc) ? rc : rc2);
2023	return rc;
2024	}
2025
2026
2027	/**
2028	* Attempts to allocate more pages until the requested amount is met.
2029	*
2030	* @returns VBox status code.
2031	* @param pGMM Pointer to the GMM instance data.
2032	* @param pGVM The calling VM.
2033	* @param pSet Pointer to the free set to grow.
2034	* @param cPages The number of pages needed.
2035	*
2036	* @remarks Called owning the mutex, but will leave it temporarily while
2037	* allocating the memory!
2038	*/
2039	static int gmmR0AllocateMoreChunks(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet, uint32_t cPages)
2040	{
2041	Assert(!pGMM->fLegacyAllocationMode);
2042
2043	if (!GMM_CHECK_SANITY_IN_LOOPS(pGMM))
2044	return VERR_INTERNAL_ERROR_4;
2045
2046	if (!pGMM->fBoundMemoryMode)
2047	{
2048	/*
2049	* Try steal free chunks from the other set first. (Only take 100% free chunks.)
2050	*/
2051	PGMMCHUNKFREESET pOtherSet = pSet == &pGMM->Private ? &pGMM->Shared : &pGMM->Private;
2052	while ( pSet->cFreePages < cPages
2053	&& pOtherSet->cFreePages >= GMM_CHUNK_NUM_PAGES)
2054	{
2055	PGMMCHUNK pChunk = pOtherSet->apLists[RT_ELEMENTS(pOtherSet->apLists) - 1];
2056	while (pChunk && pChunk->cFree != GMM_CHUNK_NUM_PAGES)
2057	pChunk = pChunk->pFreeNext;
2058	if (!pChunk)
2059	break;
2060
2061	gmmR0UnlinkChunk(pChunk);
2062	gmmR0LinkChunk(pChunk, pSet);
2063	}
2064
2065	/*
2066	* If we need still more pages, allocate new chunks.
2067	* Note! We will leave the mutex while doing the allocation,
2068	*/
2069	while (pSet->cFreePages < cPages)
2070	{
2071	int rc = gmmR0AllocateOneChunk(pGMM, pSet, pGVM->hSelf);
2072	if (RT_FAILURE(rc))
2073	return rc;
2074	if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2075	return VERR_INTERNAL_ERROR_5;
2076	}
2077	}
2078	else
2079	{
2080	/*
2081	* The memory is bound to the VM allocating it, so we have to count
2082	* the free pages carefully as well as making sure we brand them with
2083	* our VM handle.
2084	*
2085	* Note! We will leave the mutex while doing the allocation,
2086	*/
2087	uint16_t const hGVM = pGVM->hSelf;
2088	for (;;)
2089	{
2090	/* Count and see if we've reached the goal. */
2091	uint32_t cPagesFound = 0;
2092	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
2093	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
2094	if (pCur->hGVM == hGVM)
2095	{
2096	cPagesFound += pCur->cFree;
2097	if (cPagesFound >= cPages)
2098	break;
2099	}
2100	if (cPagesFound >= cPages)
2101	break;
2102
2103	/* Allocate more. */
2104	int rc = gmmR0AllocateOneChunk(pGMM, pSet, hGVM);
2105	if (RT_FAILURE(rc))
2106	return rc;
2107	if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2108	return VERR_INTERNAL_ERROR_5;
2109	}
2110	}
2111
2112	return VINF_SUCCESS;
2113	}
2114
2115
2116	/**
2117	* Allocates one private page.
2118	*
2119	* Worker for gmmR0AllocatePages.
2120	*
2121	* @param pGMM Pointer to the GMM instance data.
2122	* @param hGVM The GVM handle of the VM requesting memory.
2123	* @param pChunk The chunk to allocate it from.
2124	* @param pPageDesc The page descriptor.
2125	*/
2126	static void gmmR0AllocatePage(PGMM pGMM, uint32_t hGVM, PGMMCHUNK pChunk, PGMMPAGEDESC pPageDesc)
2127	{
2128	/* update the chunk stats. */
2129	if (pChunk->hGVM == NIL_GVM_HANDLE)
2130	pChunk->hGVM = hGVM;
2131	Assert(pChunk->cFree);
2132	pChunk->cFree--;
2133	pChunk->cPrivate++;
2134
2135	/* unlink the first free page. */
2136	const uint32_t iPage = pChunk->iFreeHead;
2137	AssertReleaseMsg(iPage < RT_ELEMENTS(pChunk->aPages), ("%d\n", iPage));
2138	PGMMPAGE pPage = &pChunk->aPages[iPage];
2139	Assert(GMM_PAGE_IS_FREE(pPage));
2140	pChunk->iFreeHead = pPage->Free.iNext;
2141	Log3(("A pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x iNext=%#x\n",
2142	pPage, iPage, (pChunk->Core.Key << GMM_CHUNKID_SHIFT) \| iPage,
2143	pPage->Common.u2State, pChunk->iFreeHead, pPage->Free.iNext));
2144
2145	/* make the page private. */
2146	pPage->u = 0;
2147	AssertCompile(GMM_PAGE_STATE_PRIVATE == 0);
2148	pPage->Private.hGVM = hGVM;
2149	AssertCompile(NIL_RTHCPHYS >= GMM_GCPHYS_LAST);
2150	AssertCompile(GMM_GCPHYS_UNSHAREABLE >= GMM_GCPHYS_LAST);
2151	if (pPageDesc->HCPhysGCPhys <= GMM_GCPHYS_LAST)
2152	pPage->Private.pfn = pPageDesc->HCPhysGCPhys >> PAGE_SHIFT;
2153	else
2154	pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE; /* unshareable / unassigned - same thing. */
2155
2156	/* update the page descriptor. */
2157	pPageDesc->HCPhysGCPhys = RTR0MemObjGetPagePhysAddr(pChunk->hMemObj, iPage);
2158	Assert(pPageDesc->HCPhysGCPhys != NIL_RTHCPHYS);
2159	pPageDesc->idPage = (pChunk->Core.Key << GMM_CHUNKID_SHIFT) \| iPage;
2160	pPageDesc->idSharedPage = NIL_GMM_PAGEID;
2161	}
2162
2163
2164	/**
2165	* Common worker for GMMR0AllocateHandyPages and GMMR0AllocatePages.
2166	*
2167	* @returns VBox status code:
2168	* @retval VINF_SUCCESS on success.
2169	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk or
2170	* gmmR0AllocateMoreChunks is necessary.
2171	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2172	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2173	* that is we're trying to allocate more than we've reserved.
2174	*
2175	* @param pGMM Pointer to the GMM instance data.
2176	* @param pGVM Pointer to the shared VM structure.
2177	* @param cPages The number of pages to allocate.
2178	* @param paPages Pointer to the page descriptors.
2179	* See GMMPAGEDESC for details on what is expected on input.
2180	* @param enmAccount The account to charge.
2181	*/
2182	static int gmmR0AllocatePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
2183	{
2184	/*
2185	* Check allocation limits.
2186	*/
2187	if (RT_UNLIKELY(pGMM->cAllocatedPages + cPages > pGMM->cMaxPages))
2188	return VERR_GMM_HIT_GLOBAL_LIMIT;
2189
2190	switch (enmAccount)
2191	{
2192	case GMMACCOUNT_BASE:
2193	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cPages > pGVM->gmm.s.Reserved.cBasePages))
2194	{
2195	Log(("gmmR0AllocatePages:Base: Reserved=%#llx Allocated+Ballooned+Requested=%#llx+%#llx+%#x!\n",
2196	pGVM->gmm.s.Reserved.cBasePages, pGVM->gmm.s.Allocated.cBasePages, pGVM->gmm.s.cBalloonedPages, cPages));
2197	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2198	}
2199	break;
2200	case GMMACCOUNT_SHADOW:
2201	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cShadowPages + cPages > pGVM->gmm.s.Reserved.cShadowPages))
2202	{
2203	Log(("gmmR0AllocatePages:Shadow: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
2204	pGVM->gmm.s.Reserved.cShadowPages, pGVM->gmm.s.Allocated.cShadowPages, cPages));
2205	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2206	}
2207	break;
2208	case GMMACCOUNT_FIXED:
2209	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cFixedPages + cPages > pGVM->gmm.s.Reserved.cFixedPages))
2210	{
2211	Log(("gmmR0AllocatePages:Fixed: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
2212	pGVM->gmm.s.Reserved.cFixedPages, pGVM->gmm.s.Allocated.cFixedPages, cPages));
2213	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2214	}
2215	break;
2216	default:
2217	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2218	}
2219
2220	/*
2221	* Check if we need to allocate more memory or not. In bound memory mode this
2222	* is a bit extra work but it's easier to do it upfront than bailing out later.
2223	*/
2224	PGMMCHUNKFREESET pSet = &pGMM->Private;
2225	if (pSet->cFreePages < cPages)
2226	return VERR_GMM_SEED_ME;
2227	if (pGMM->fBoundMemoryMode)
2228	{
2229	uint16_t hGVM = pGVM->hSelf;
2230	uint32_t cPagesFound = 0;
2231	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
2232	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
2233	if (pCur->hGVM == hGVM)
2234	{
2235	cPagesFound += pCur->cFree;
2236	if (cPagesFound >= cPages)
2237	break;
2238	}
2239	if (cPagesFound < cPages)
2240	return VERR_GMM_SEED_ME;
2241	}
2242
2243	/*
2244	* Pick the pages.
2245	* Try make some effort keeping VMs sharing private chunks.
2246	*/
2247	uint16_t hGVM = pGVM->hSelf;
2248	uint32_t iPage = 0;
2249
2250	/* first round, pick from chunks with an affinity to the VM. */
2251	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists) && iPage < cPages; i++)
2252	{
2253	PGMMCHUNK pCurFree = NULL;
2254	PGMMCHUNK pCur = pSet->apLists[i];
2255	while (pCur && iPage < cPages)
2256	{
2257	PGMMCHUNK pNext = pCur->pFreeNext;
2258
2259	if ( pCur->hGVM == hGVM
2260	&& pCur->cFree < GMM_CHUNK_NUM_PAGES)
2261	{
2262	gmmR0UnlinkChunk(pCur);
2263	for (; pCur->cFree && iPage < cPages; iPage++)
2264	gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
2265	gmmR0LinkChunk(pCur, pSet);
2266	}
2267
2268	pCur = pNext;
2269	}
2270	}
2271
2272	if (iPage < cPages)
2273	{
2274	/* second round, pick pages from the 100% empty chunks we just skipped above. */
2275	PGMMCHUNK pCurFree = NULL;
2276	PGMMCHUNK pCur = pSet->apLists[RT_ELEMENTS(pSet->apLists) - 1];
2277	while (pCur && iPage < cPages)
2278	{
2279	PGMMCHUNK pNext = pCur->pFreeNext;
2280
2281	if ( pCur->cFree == GMM_CHUNK_NUM_PAGES
2282	&& ( pCur->hGVM == hGVM
2283	\|\| !pGMM->fBoundMemoryMode))
2284	{
2285	gmmR0UnlinkChunk(pCur);
2286	for (; pCur->cFree && iPage < cPages; iPage++)
2287	gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
2288	gmmR0LinkChunk(pCur, pSet);
2289	}
2290
2291	pCur = pNext;
2292	}
2293	}
2294
2295	if ( iPage < cPages
2296	&& !pGMM->fBoundMemoryMode)
2297	{
2298	/* third round, disregard affinity. */
2299	unsigned i = RT_ELEMENTS(pSet->apLists);
2300	while (i-- > 0 && iPage < cPages)
2301	{
2302	PGMMCHUNK pCurFree = NULL;
2303	PGMMCHUNK pCur = pSet->apLists[i];
2304	while (pCur && iPage < cPages)
2305	{
2306	PGMMCHUNK pNext = pCur->pFreeNext;
2307
2308	if ( pCur->cFree > GMM_CHUNK_NUM_PAGES / 2
2309	&& cPages >= GMM_CHUNK_NUM_PAGES / 2)
2310	pCur->hGVM = hGVM; /* change chunk affinity */
2311
2312	gmmR0UnlinkChunk(pCur);
2313	for (; pCur->cFree && iPage < cPages; iPage++)
2314	gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
2315	gmmR0LinkChunk(pCur, pSet);
2316
2317	pCur = pNext;
2318	}
2319	}
2320	}
2321
2322	/*
2323	* Update the account.
2324	*/
2325	switch (enmAccount)
2326	{
2327	case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages += iPage; break;
2328	case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages += iPage; break;
2329	case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages += iPage; break;
2330	default:
2331	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2332	}
2333	pGVM->gmm.s.cPrivatePages += iPage;
2334	pGMM->cAllocatedPages += iPage;
2335
2336	AssertMsgReturn(iPage == cPages, ("%u != %u\n", iPage, cPages), VERR_INTERNAL_ERROR);
2337
2338	/*
2339	* Check if we've reached some threshold and should kick one or two VMs and tell
2340	* them to inflate their balloons a bit more... later.
2341	*/
2342
2343	return VINF_SUCCESS;
2344	}
2345
2346
2347	/**
2348	* Updates the previous allocations and allocates more pages.
2349	*
2350	* The handy pages are always taken from the 'base' memory account.
2351	* The allocated pages are not cleared and will contains random garbage.
2352	*
2353	* @returns VBox status code:
2354	* @retval VINF_SUCCESS on success.
2355	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2356	* @retval VERR_GMM_PAGE_NOT_FOUND if one of the pages to update wasn't found.
2357	* @retval VERR_GMM_PAGE_NOT_PRIVATE if one of the pages to update wasn't a
2358	* private page.
2359	* @retval VERR_GMM_PAGE_NOT_SHARED if one of the pages to update wasn't a
2360	* shared page.
2361	* @retval VERR_GMM_NOT_PAGE_OWNER if one of the pages to be updated wasn't
2362	* owned by the VM.
2363	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2364	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2365	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2366	* that is we're trying to allocate more than we've reserved.
2367	*
2368	* @param pVM Pointer to the shared VM structure.
2369	* @param idCpu VCPU id
2370	* @param cPagesToUpdate The number of pages to update (starting from the head).
2371	* @param cPagesToAlloc The number of pages to allocate (starting from the head).
2372	* @param paPages The array of page descriptors.
2373	* See GMMPAGEDESC for details on what is expected on input.
2374	* @thread EMT.
2375	*/
2376	GMMR0DECL(int) GMMR0AllocateHandyPages(PVM pVM, VMCPUID idCpu, uint32_t cPagesToUpdate, uint32_t cPagesToAlloc, PGMMPAGEDESC paPages)
2377	{
2378	LogFlow(("GMMR0AllocateHandyPages: pVM=%p cPagesToUpdate=%#x cPagesToAlloc=%#x paPages=%p\n",
2379	pVM, cPagesToUpdate, cPagesToAlloc, paPages));
2380
2381	/*
2382	* Validate, get basics and take the semaphore.
2383	* (This is a relatively busy path, so make predictions where possible.)
2384	*/
2385	PGMM pGMM;
2386	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2387	PGVM pGVM;
2388	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2389	if (RT_FAILURE(rc))
2390	return rc;
2391
2392	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2393	AssertMsgReturn( (cPagesToUpdate && cPagesToUpdate < 1024)
2394	\|\| (cPagesToAlloc && cPagesToAlloc < 1024),
2395	("cPagesToUpdate=%#x cPagesToAlloc=%#x\n", cPagesToUpdate, cPagesToAlloc),
2396	VERR_INVALID_PARAMETER);
2397
2398	unsigned iPage = 0;
2399	for (; iPage < cPagesToUpdate; iPage++)
2400	{
2401	AssertMsgReturn( ( paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
2402	&& !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK))
2403	\|\| paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
2404	\|\| paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE,
2405	("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys),
2406	VERR_INVALID_PARAMETER);
2407	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2408	/\|\| paPages[iPage].idPage == NIL_GMM_PAGEID/,
2409	("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2410	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2411	/\|\| paPages[iPage].idSharedPage == NIL_GMM_PAGEID/,
2412	("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2413	}
2414
2415	for (; iPage < cPagesToAlloc; iPage++)
2416	{
2417	AssertMsgReturn(paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS, ("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys), VERR_INVALID_PARAMETER);
2418	AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2419	AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2420	}
2421
2422	gmmR0MutexAcquire(pGMM);
2423	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2424	{
2425	/* No allocations before the initial reservation has been made! */
2426	if (RT_LIKELY( pGVM->gmm.s.Reserved.cBasePages
2427	&& pGVM->gmm.s.Reserved.cFixedPages
2428	&& pGVM->gmm.s.Reserved.cShadowPages))
2429	{
2430	/*
2431	* Perform the updates.
2432	* Stop on the first error.
2433	*/
2434	for (iPage = 0; iPage < cPagesToUpdate; iPage++)
2435	{
2436	if (paPages[iPage].idPage != NIL_GMM_PAGEID)
2437	{
2438	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idPage);
2439	if (RT_LIKELY(pPage))
2440	{
2441	if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
2442	{
2443	if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
2444	{
2445	AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
2446	if (RT_LIKELY(paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST))
2447	pPage->Private.pfn = paPages[iPage].HCPhysGCPhys >> PAGE_SHIFT;
2448	else if (paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE)
2449	pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE;
2450	/* else: NIL_RTHCPHYS nothing */
2451
2452	paPages[iPage].idPage = NIL_GMM_PAGEID;
2453	paPages[iPage].HCPhysGCPhys = NIL_RTHCPHYS;
2454	}
2455	else
2456	{
2457	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not owner! hGVM=%#x hSelf=%#x\n",
2458	iPage, paPages[iPage].idPage, pPage->Private.hGVM, pGVM->hSelf));
2459	rc = VERR_GMM_NOT_PAGE_OWNER;
2460	break;
2461	}
2462	}
2463	else
2464	{
2465	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not private! %.Rhxs (type %d)\n", iPage, paPages[iPage].idPage, sizeof(pPage), pPage, pPage->Common.u2State));
2466	rc = VERR_GMM_PAGE_NOT_PRIVATE;
2467	break;
2468	}
2469	}
2470	else
2471	{
2472	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (private)\n", iPage, paPages[iPage].idPage));
2473	rc = VERR_GMM_PAGE_NOT_FOUND;
2474	break;
2475	}
2476	}
2477
2478	if (paPages[iPage].idSharedPage != NIL_GMM_PAGEID)
2479	{
2480	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idSharedPage);
2481	if (RT_LIKELY(pPage))
2482	{
2483	if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
2484	{
2485	AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
2486	Assert(pPage->Shared.cRefs);
2487	Assert(pGVM->gmm.s.cSharedPages);
2488	Assert(pGVM->gmm.s.Allocated.cBasePages);
2489
2490	Log(("GMMR0AllocateHandyPages: free shared page %x cRefs=%d\n", paPages[iPage].idSharedPage, pPage->Shared.cRefs));
2491	pGVM->gmm.s.cSharedPages--;
2492	pGVM->gmm.s.Allocated.cBasePages--;
2493	if (!--pPage->Shared.cRefs)
2494	{
2495	gmmR0FreeSharedPage(pGMM, paPages[iPage].idSharedPage, pPage);
2496	}
2497	else
2498	{
2499	Assert(pGMM->cDuplicatePages);
2500	pGMM->cDuplicatePages--;
2501	}
2502
2503	paPages[iPage].idSharedPage = NIL_GMM_PAGEID;
2504	}
2505	else
2506	{
2507	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not shared!\n", iPage, paPages[iPage].idSharedPage));
2508	rc = VERR_GMM_PAGE_NOT_SHARED;
2509	break;
2510	}
2511	}
2512	else
2513	{
2514	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (shared)\n", iPage, paPages[iPage].idSharedPage));
2515	rc = VERR_GMM_PAGE_NOT_FOUND;
2516	break;
2517	}
2518	}
2519	}
2520
2521	/*
2522	* Join paths with GMMR0AllocatePages for the allocation.
2523	* Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
2524	*/
2525	while (RT_SUCCESS(rc))
2526	{
2527	rc = gmmR0AllocatePages(pGMM, pGVM, cPagesToAlloc, paPages, GMMACCOUNT_BASE);
2528	if ( rc != VERR_GMM_SEED_ME
2529	\|\| pGMM->fLegacyAllocationMode)
2530	break;
2531	rc = gmmR0AllocateMoreChunks(pGMM, pGVM, &pGMM->Private, cPagesToAlloc);
2532	}
2533	}
2534	else
2535	rc = VERR_WRONG_ORDER;
2536	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2537	}
2538	else
2539	rc = VERR_INTERNAL_ERROR_5;
2540	gmmR0MutexRelease(pGMM);
2541	LogFlow(("GMMR0AllocateHandyPages: returns %Rrc\n", rc));
2542	return rc;
2543	}
2544
2545
2546	/**
2547	* Allocate one or more pages.
2548	*
2549	* This is typically used for ROMs and MMIO2 (VRAM) during VM creation.
2550	* The allocated pages are not cleared and will contains random garbage.
2551	*
2552	* @returns VBox status code:
2553	* @retval VINF_SUCCESS on success.
2554	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2555	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2556	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2557	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2558	* that is we're trying to allocate more than we've reserved.
2559	*
2560	* @param pVM Pointer to the shared VM structure.
2561	* @param idCpu VCPU id
2562	* @param cPages The number of pages to allocate.
2563	* @param paPages Pointer to the page descriptors.
2564	* See GMMPAGEDESC for details on what is expected on input.
2565	* @param enmAccount The account to charge.
2566	*
2567	* @thread EMT.
2568	*/
2569	GMMR0DECL(int) GMMR0AllocatePages(PVM pVM, VMCPUID idCpu, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
2570	{
2571	LogFlow(("GMMR0AllocatePages: pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pVM, cPages, paPages, enmAccount));
2572
2573	/*
2574	* Validate, get basics and take the semaphore.
2575	*/
2576	PGMM pGMM;
2577	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2578	PGVM pGVM;
2579	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2580	if (RT_FAILURE(rc))
2581	return rc;
2582
2583	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2584	AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
2585	AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
2586
2587	for (unsigned iPage = 0; iPage < cPages; iPage++)
2588	{
2589	AssertMsgReturn( paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
2590	\|\| paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE
2591	\|\| ( enmAccount == GMMACCOUNT_BASE
2592	&& paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
2593	&& !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK)),
2594	("#%#x: %RHp enmAccount=%d\n", iPage, paPages[iPage].HCPhysGCPhys, enmAccount),
2595	VERR_INVALID_PARAMETER);
2596	AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2597	AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2598	}
2599
2600	gmmR0MutexAcquire(pGMM);
2601	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2602	{
2603
2604	/* No allocations before the initial reservation has been made! */
2605	if (RT_LIKELY( pGVM->gmm.s.Reserved.cBasePages
2606	&& pGVM->gmm.s.Reserved.cFixedPages
2607	&& pGVM->gmm.s.Reserved.cShadowPages))
2608	{
2609	/*
2610	* gmmR0AllocatePages seed loop.
2611	* Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
2612	*/
2613	while (RT_SUCCESS(rc))
2614	{
2615	rc = gmmR0AllocatePages(pGMM, pGVM, cPages, paPages, enmAccount);
2616	if ( rc != VERR_GMM_SEED_ME
2617	\|\| pGMM->fLegacyAllocationMode)
2618	break;
2619	rc = gmmR0AllocateMoreChunks(pGMM, pGVM, &pGMM->Private, cPages);
2620	}
2621	}
2622	else
2623	rc = VERR_WRONG_ORDER;
2624	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2625	}
2626	else
2627	rc = VERR_INTERNAL_ERROR_5;
2628	gmmR0MutexRelease(pGMM);
2629	LogFlow(("GMMR0AllocatePages: returns %Rrc\n", rc));
2630	return rc;
2631	}
2632
2633
2634	/**
2635	* VMMR0 request wrapper for GMMR0AllocatePages.
2636	*
2637	* @returns see GMMR0AllocatePages.
2638	* @param pVM Pointer to the shared VM structure.
2639	* @param idCpu VCPU id
2640	* @param pReq The request packet.
2641	*/
2642	GMMR0DECL(int) GMMR0AllocatePagesReq(PVM pVM, VMCPUID idCpu, PGMMALLOCATEPAGESREQ pReq)
2643	{
2644	/*
2645	* Validate input and pass it on.
2646	*/
2647	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2648	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2649	AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0]),
2650	("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0])),
2651	VERR_INVALID_PARAMETER);
2652	AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[pReq->cPages]),
2653	("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[pReq->cPages])),
2654	VERR_INVALID_PARAMETER);
2655
2656	return GMMR0AllocatePages(pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
2657	}
2658
2659
2660	/**
2661	* Allocate a large page to represent guest RAM
2662	*
2663	* The allocated pages are not cleared and will contains random garbage.
2664	*
2665	* @returns VBox status code:
2666	* @retval VINF_SUCCESS on success.
2667	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2668	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2669	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2670	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2671	* that is we're trying to allocate more than we've reserved.
2672	* @returns see GMMR0AllocatePages.
2673	* @param pVM Pointer to the shared VM structure.
2674	* @param idCpu VCPU id
2675	* @param cbPage Large page size
2676	*/
2677	GMMR0DECL(int) GMMR0AllocateLargePage(PVM pVM, VMCPUID idCpu, uint32_t cbPage, uint32_t pIdPage, RTHCPHYS pHCPhys)
2678	{
2679	LogFlow(("GMMR0AllocateLargePage: pVM=%p cbPage=%x\n", pVM, cbPage));
2680
2681	AssertReturn(cbPage == GMM_CHUNK_SIZE, VERR_INVALID_PARAMETER);
2682	AssertPtrReturn(pIdPage, VERR_INVALID_PARAMETER);
2683	AssertPtrReturn(pHCPhys, VERR_INVALID_PARAMETER);
2684
2685	/*
2686	* Validate, get basics and take the semaphore.
2687	*/
2688	PGMM pGMM;
2689	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2690	PGVM pGVM;
2691	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2692	if (RT_FAILURE(rc))
2693	return rc;
2694
2695	/* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
2696	if (pGMM->fLegacyAllocationMode)
2697	return VERR_NOT_SUPPORTED;
2698
2699	*pHCPhys = NIL_RTHCPHYS;
2700	*pIdPage = NIL_GMM_PAGEID;
2701
2702	gmmR0MutexAcquire(pGMM);
2703	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2704	{
2705	const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
2706	if (RT_UNLIKELY( pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cPages
2707	> pGVM->gmm.s.Reserved.cBasePages))
2708	{
2709	Log(("GMMR0AllocateLargePage: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
2710	pGVM->gmm.s.Reserved.cBasePages, pGVM->gmm.s.Allocated.cBasePages, cPages));
2711	gmmR0MutexRelease(pGMM);
2712	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2713	}
2714
2715	/*
2716	* Allocate a new large page chunk.
2717	*
2718	* Note! We leave the giant GMM lock temporarily as the allocation might
2719	* take a long time. gmmR0RegisterChunk will retake it (ugly).
2720	*/
2721	AssertCompile(GMM_CHUNK_SIZE == _2M);
2722	gmmR0MutexRelease(pGMM);
2723
2724	RTR0MEMOBJ hMemObj;
2725	rc = RTR0MemObjAllocPhysEx(&hMemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS, GMM_CHUNK_SIZE);
2726	if (RT_SUCCESS(rc))
2727	{
2728	PGMMCHUNK pChunk;
2729	rc = gmmR0RegisterChunk(pGMM, &pGMM->Private, hMemObj, pGVM->hSelf, GMM_CHUNK_FLAGS_LARGE_PAGE, &pChunk);
2730	if (RT_SUCCESS(rc))
2731	{
2732	/*
2733	* Allocate all the pages in the chunk.
2734	*/
2735	/* Unlink the new chunk from the free list. */
2736	gmmR0UnlinkChunk(pChunk);
2737
2738	/** @todo rewrite this to skip the looping. */
2739	/* Allocate all pages. */
2740	GMMPAGEDESC PageDesc;
2741	gmmR0AllocatePage(pGMM, pGVM->hSelf, pChunk, &PageDesc);
2742
2743	/* Return the first page as we'll use the whole chunk as one big page. */
2744	*pIdPage = PageDesc.idPage;
2745	*pHCPhys = PageDesc.HCPhysGCPhys;
2746
2747	for (unsigned i = 1; i < cPages; i++)
2748	gmmR0AllocatePage(pGMM, pGVM->hSelf, pChunk, &PageDesc);
2749
2750	/* Update accounting. */
2751	pGVM->gmm.s.Allocated.cBasePages += cPages;
2752	pGVM->gmm.s.cPrivatePages += cPages;
2753	pGMM->cAllocatedPages += cPages;
2754
2755	gmmR0LinkChunk(pChunk, &pGMM->Private);
2756	gmmR0MutexRelease(pGMM);
2757	}
2758	else
2759	RTR0MemObjFree(hMemObj, false /* fFreeMappings */);
2760	}
2761	}
2762	else
2763	{
2764	gmmR0MutexRelease(pGMM);
2765	rc = VERR_INTERNAL_ERROR_5;
2766	}
2767
2768	LogFlow(("GMMR0AllocateLargePage: returns %Rrc\n", rc));
2769	return rc;
2770	}
2771
2772
2773	/**
2774	* Free a large page
2775	*
2776	* @returns VBox status code:
2777	* @param pVM Pointer to the shared VM structure.
2778	* @param idCpu VCPU id
2779	* @param idPage Large page id
2780	*/
2781	GMMR0DECL(int) GMMR0FreeLargePage(PVM pVM, VMCPUID idCpu, uint32_t idPage)
2782	{
2783	LogFlow(("GMMR0FreeLargePage: pVM=%p idPage=%x\n", pVM, idPage));
2784
2785	/*
2786	* Validate, get basics and take the semaphore.
2787	*/
2788	PGMM pGMM;
2789	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2790	PGVM pGVM;
2791	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2792	if (RT_FAILURE(rc))
2793	return rc;
2794
2795	/* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
2796	if (pGMM->fLegacyAllocationMode)
2797	return VERR_NOT_SUPPORTED;
2798
2799	gmmR0MutexAcquire(pGMM);
2800	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2801	{
2802	const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
2803
2804	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages < cPages))
2805	{
2806	Log(("GMMR0FreeLargePage: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cBasePages, cPages));
2807	gmmR0MutexRelease(pGMM);
2808	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2809	}
2810
2811	PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
2812	if (RT_LIKELY( pPage
2813	&& GMM_PAGE_IS_PRIVATE(pPage)))
2814	{
2815	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2816	Assert(pChunk);
2817	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2818	Assert(pChunk->cPrivate > 0);
2819
2820	/* Release the memory immediately. */
2821	gmmR0FreeChunk(pGMM, NULL, pChunk);
2822
2823	/* Update accounting. */
2824	pGVM->gmm.s.Allocated.cBasePages -= cPages;
2825	pGVM->gmm.s.cPrivatePages -= cPages;
2826	pGMM->cAllocatedPages -= cPages;
2827	}
2828	else
2829	rc = VERR_GMM_PAGE_NOT_FOUND;
2830	}
2831	else
2832	rc = VERR_INTERNAL_ERROR_5;
2833
2834	gmmR0MutexRelease(pGMM);
2835	LogFlow(("GMMR0FreeLargePage: returns %Rrc\n", rc));
2836	return rc;
2837	}
2838
2839
2840	/**
2841	* VMMR0 request wrapper for GMMR0FreeLargePage.
2842	*
2843	* @returns see GMMR0FreeLargePage.
2844	* @param pVM Pointer to the shared VM structure.
2845	* @param idCpu VCPU id
2846	* @param pReq The request packet.
2847	*/
2848	GMMR0DECL(int) GMMR0FreeLargePageReq(PVM pVM, VMCPUID idCpu, PGMMFREELARGEPAGEREQ pReq)
2849	{
2850	/*
2851	* Validate input and pass it on.
2852	*/
2853	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2854	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2855	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMFREEPAGESREQ),
2856	("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(GMMFREEPAGESREQ)),
2857	VERR_INVALID_PARAMETER);
2858
2859	return GMMR0FreeLargePage(pVM, idCpu, pReq->idPage);
2860	}
2861
2862
2863	/**
2864	* Frees a chunk, giving it back to the host OS.
2865	*
2866	* @param pGMM Pointer to the GMM instance.
2867	* @param pGVM This is set when called from GMMR0CleanupVM so we can
2868	* unmap and free the chunk in one go.
2869	* @param pChunk The chunk to free.
2870	*/
2871	static void gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
2872	{
2873	Assert(pChunk->Core.Key != NIL_GMM_CHUNKID);
2874
2875	GMMR0CHUNKMTXSTATE MtxState;
2876	gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
2877
2878	/*
2879	* Cleanup hack! Unmap the chunk from the callers address space.
2880	*/
2881	if ( pChunk->cMappingsX
2882	&& !pGMM->fLegacyAllocationMode
2883	&& pGVM)
2884	gmmR0UnmapChunkLocked(pGMM, pGVM, pChunk);
2885
2886	/*
2887	* If there are current mappings of the chunk, then request the
2888	* VMs to unmap them. Reposition the chunk in the free list so
2889	* it won't be a likely candidate for allocations.
2890	*/
2891	if (pChunk->cMappingsX)
2892	{
2893	/** @todo R0 -> VM request */
2894	/* The chunk can be mapped by more than one VM if fBoundMemoryMode is false! */
2895	Log(("gmmR0FreeChunk: chunk still has %d/%d mappings; don't free!\n", pChunk->cMappingsX));
2896	}
2897	else
2898	{
2899	/*
2900	* Try free the memory object.
2901	*/
2902	/** @todo drop the giant lock here! */
2903	int rc = RTR0MemObjFree(pChunk->hMemObj, false /* fFreeMappings */);
2904	if (RT_SUCCESS(rc))
2905	{
2906	pChunk->hMemObj = NIL_RTR0MEMOBJ;
2907
2908	/*
2909	* Unlink it from everywhere.
2910	*/
2911	gmmR0UnlinkChunk(pChunk);
2912
2913	RTListNodeRemove(&pChunk->ListNode);
2914
2915	PAVLU32NODECORE pCore = RTAvlU32Remove(&pGMM->pChunks, pChunk->Core.Key);
2916	Assert(pCore == &pChunk->Core); NOREF(pCore);
2917
2918	PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(pChunk->Core.Key)];
2919	if (pTlbe->pChunk == pChunk)
2920	{
2921	pTlbe->idChunk = NIL_GMM_CHUNKID;
2922	pTlbe->pChunk = NULL;
2923	}
2924
2925	Assert(pGMM->cChunks > 0);
2926	pGMM->cChunks--;
2927
2928	/*
2929	* Free the Chunk ID and struct.
2930	*/
2931	gmmR0FreeChunkId(pGMM, pChunk->Core.Key);
2932	pChunk->Core.Key = NIL_GMM_CHUNKID;
2933
2934	RTMemFree(pChunk->paMappingsX);
2935	pChunk->paMappingsX = NULL;
2936
2937	RTMemFree(pChunk);
2938	pChunk = NULL; /* (for gmmR0ChunkMutexRelease) */
2939
2940	pGMM->cFreedChunks++;
2941	}
2942	else
2943	AssertRC(rc);
2944	}
2945
2946	gmmR0ChunkMutexRelease(&MtxState, pChunk);
2947	}
2948
2949
2950	/**
2951	* Free page worker.
2952	*
2953	* The caller does all the statistic decrementing, we do all the incrementing.
2954	*
2955	* @param pGMM Pointer to the GMM instance data.
2956	* @param pChunk Pointer to the chunk this page belongs to.
2957	* @param idPage The Page ID.
2958	* @param pPage Pointer to the page.
2959	*/
2960	static void gmmR0FreePageWorker(PGMM pGMM, PGMMCHUNK pChunk, uint32_t idPage, PGMMPAGE pPage)
2961	{
2962	Log3(("F pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x\n",
2963	pPage, pPage - &pChunk->aPages[0], idPage, pPage->Common.u2State, pChunk->iFreeHead)); NOREF(idPage);
2964
2965	/*
2966	* Put the page on the free list.
2967	*/
2968	pPage->u = 0;
2969	pPage->Free.u2State = GMM_PAGE_STATE_FREE;
2970	Assert(pChunk->iFreeHead < RT_ELEMENTS(pChunk->aPages) \|\| pChunk->iFreeHead == UINT16_MAX);
2971	pPage->Free.iNext = pChunk->iFreeHead;
2972	pChunk->iFreeHead = pPage - &pChunk->aPages[0];
2973
2974	/*
2975	* Update statistics (the cShared/cPrivate stats are up to date already),
2976	* and relink the chunk if necessary.
2977	*/
2978	if (gmmR0SelectFreeSetList(pChunk->cFree) != gmmR0SelectFreeSetList(pChunk->cFree + 1))
2979	{
2980	gmmR0UnlinkChunk(pChunk);
2981	pChunk->cFree++;
2982	gmmR0LinkChunk(pChunk, pChunk->cShared ? &pGMM->Shared : &pGMM->Private);
2983	}
2984	else
2985	{
2986	pChunk->cFree++;
2987	pChunk->pSet->cFreePages++;
2988	}
2989
2990	/*
2991	* If the chunk becomes empty, consider giving memory back to the host OS.
2992	*
2993	* The current strategy is to try give it back if there are other chunks
2994	* in this free list, meaning if there are at least 240 free pages in this
2995	* category. Note that since there are probably mappings of the chunk,
2996	* it won't be freed up instantly, which probably screws up this logic
2997	* a bit...
2998	*/
2999	if (RT_UNLIKELY( pChunk->cFree == GMM_CHUNK_NUM_PAGES
3000	&& pChunk->pFreeNext
3001	&& pChunk->pFreePrev /** @todo this is probably misfiring, see reset... */
3002	&& !pGMM->fLegacyAllocationMode))
3003	gmmR0FreeChunk(pGMM, NULL, pChunk);
3004
3005	}
3006
3007
3008	/**
3009	* Frees a shared page, the page is known to exist and be valid and such.
3010	*
3011	* @param pGMM Pointer to the GMM instance.
3012	* @param idPage The Page ID
3013	* @param pPage The page structure.
3014	*/
3015	DECLINLINE(void) gmmR0FreeSharedPage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage)
3016	{
3017	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
3018	Assert(pChunk);
3019	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
3020	Assert(pChunk->cShared > 0);
3021	Assert(pGMM->cSharedPages > 0);
3022	Assert(pGMM->cAllocatedPages > 0);
3023	Assert(!pPage->Shared.cRefs);
3024
3025	pChunk->cShared--;
3026	pGMM->cAllocatedPages--;
3027	pGMM->cSharedPages--;
3028	gmmR0FreePageWorker(pGMM, pChunk, idPage, pPage);
3029	}
3030
3031	#ifdef VBOX_WITH_PAGE_SHARING
3032
3033	/**
3034	* Converts a private page to a shared page, the page is known to exist and be valid and such.
3035	*
3036	* @param pGMM Pointer to the GMM instance.
3037	* @param pGVM Pointer to the GVM instance.
3038	* @param HCPhys Host physical address
3039	* @param idPage The Page ID
3040	* @param pPage The page structure.
3041	*/
3042	DECLINLINE(void) gmmR0ConvertToSharedPage(PGMM pGMM, PGVM pGVM, RTHCPHYS HCPhys, uint32_t idPage, PGMMPAGE pPage)
3043	{
3044	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
3045	Assert(pChunk);
3046	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
3047	Assert(GMM_PAGE_IS_PRIVATE(pPage));
3048
3049	pChunk->cPrivate--;
3050	pChunk->cShared++;
3051
3052	pGMM->cSharedPages++;
3053
3054	pGVM->gmm.s.cSharedPages++;
3055	pGVM->gmm.s.cPrivatePages--;
3056
3057	/* Modify the page structure. */
3058	pPage->Shared.pfn = (uint32_t)(uint64_t)(HCPhys >> PAGE_SHIFT);
3059	pPage->Shared.cRefs = 1;
3060	pPage->Common.u2State = GMM_PAGE_STATE_SHARED;
3061	}
3062
3063
3064	/**
3065	* Increase the use count of a shared page, the page is known to exist and be valid and such.
3066	*
3067	* @param pGMM Pointer to the GMM instance.
3068	* @param pGVM Pointer to the GVM instance.
3069	* @param pPage The page structure.
3070	*/
3071	DECLINLINE(void) gmmR0UseSharedPage(PGMM pGMM, PGVM pGVM, PGMMPAGE pPage)
3072	{
3073	Assert(pGMM->cSharedPages > 0);
3074	Assert(pGMM->cAllocatedPages > 0);
3075
3076	pGMM->cDuplicatePages++;
3077
3078	pPage->Shared.cRefs++;
3079	pGVM->gmm.s.cSharedPages++;
3080	pGVM->gmm.s.Allocated.cBasePages++;
3081	}
3082
3083	#endif /* VBOX_WITH_PAGE_SHARING */
3084
3085	/**
3086	* Frees a private page, the page is known to exist and be valid and such.
3087	*
3088	* @param pGMM Pointer to the GMM instance.
3089	* @param idPage The Page ID
3090	* @param pPage The page structure.
3091	*/
3092	DECLINLINE(void) gmmR0FreePrivatePage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage)
3093	{
3094	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
3095	Assert(pChunk);
3096	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
3097	Assert(pChunk->cPrivate > 0);
3098	Assert(pGMM->cAllocatedPages > 0);
3099
3100	pChunk->cPrivate--;
3101	pGMM->cAllocatedPages--;
3102	gmmR0FreePageWorker(pGMM, pChunk, idPage, pPage);
3103	}
3104
3105
3106	/**
3107	* Common worker for GMMR0FreePages and GMMR0BalloonedPages.
3108	*
3109	* @returns VBox status code:
3110	* @retval xxx
3111	*
3112	* @param pGMM Pointer to the GMM instance data.
3113	* @param pGVM Pointer to the shared VM structure.
3114	* @param cPages The number of pages to free.
3115	* @param paPages Pointer to the page descriptors.
3116	* @param enmAccount The account this relates to.
3117	*/
3118	static int gmmR0FreePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
3119	{
3120	/*
3121	* Check that the request isn't impossible wrt to the account status.
3122	*/
3123	switch (enmAccount)
3124	{
3125	case GMMACCOUNT_BASE:
3126	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages < cPages))
3127	{
3128	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cBasePages, cPages));
3129	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3130	}
3131	break;
3132	case GMMACCOUNT_SHADOW:
3133	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cShadowPages < cPages))
3134	{
3135	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cShadowPages, cPages));
3136	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3137	}
3138	break;
3139	case GMMACCOUNT_FIXED:
3140	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cFixedPages < cPages))
3141	{
3142	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cFixedPages, cPages));
3143	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3144	}
3145	break;
3146	default:
3147	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
3148	}
3149
3150	/*
3151	* Walk the descriptors and free the pages.
3152	*
3153	* Statistics (except the account) are being updated as we go along,
3154	* unlike the alloc code. Also, stop on the first error.
3155	*/
3156	int rc = VINF_SUCCESS;
3157	uint32_t iPage;
3158	for (iPage = 0; iPage < cPages; iPage++)
3159	{
3160	uint32_t idPage = paPages[iPage].idPage;
3161	PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
3162	if (RT_LIKELY(pPage))
3163	{
3164	if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
3165	{
3166	if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
3167	{
3168	Assert(pGVM->gmm.s.cPrivatePages);
3169	pGVM->gmm.s.cPrivatePages--;
3170	gmmR0FreePrivatePage(pGMM, idPage, pPage);
3171	}
3172	else
3173	{
3174	Log(("gmmR0AllocatePages: #%#x/%#x: not owner! hGVM=%#x hSelf=%#x\n", iPage, idPage,
3175	pPage->Private.hGVM, pGVM->hSelf));
3176	rc = VERR_GMM_NOT_PAGE_OWNER;
3177	break;
3178	}
3179	}
3180	else if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
3181	{
3182	Assert(pGVM->gmm.s.cSharedPages);
3183	pGVM->gmm.s.cSharedPages--;
3184	Assert(pPage->Shared.cRefs);
3185	if (!--pPage->Shared.cRefs)
3186	gmmR0FreeSharedPage(pGMM, idPage, pPage);
3187	else
3188	{
3189	Assert(pGMM->cDuplicatePages);
3190	pGMM->cDuplicatePages--;
3191	}
3192	}
3193	else
3194	{
3195	Log(("gmmR0AllocatePages: #%#x/%#x: already free!\n", iPage, idPage));
3196	rc = VERR_GMM_PAGE_ALREADY_FREE;
3197	break;
3198	}
3199	}
3200	else
3201	{
3202	Log(("gmmR0AllocatePages: #%#x/%#x: not found!\n", iPage, idPage));
3203	rc = VERR_GMM_PAGE_NOT_FOUND;
3204	break;
3205	}
3206	paPages[iPage].idPage = NIL_GMM_PAGEID;
3207	}
3208
3209	/*
3210	* Update the account.
3211	*/
3212	switch (enmAccount)
3213	{
3214	case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages -= iPage; break;
3215	case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages -= iPage; break;
3216	case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages -= iPage; break;
3217	default:
3218	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
3219	}
3220
3221	/*
3222	* Any threshold stuff to be done here?
3223	*/
3224
3225	return rc;
3226	}
3227
3228
3229	/**
3230	* Free one or more pages.
3231	*
3232	* This is typically used at reset time or power off.
3233	*
3234	* @returns VBox status code:
3235	* @retval xxx
3236	*
3237	* @param pVM Pointer to the shared VM structure.
3238	* @param idCpu VCPU id
3239	* @param cPages The number of pages to allocate.
3240	* @param paPages Pointer to the page descriptors containing the Page IDs for each page.
3241	* @param enmAccount The account this relates to.
3242	* @thread EMT.
3243	*/
3244	GMMR0DECL(int) GMMR0FreePages(PVM pVM, VMCPUID idCpu, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
3245	{
3246	LogFlow(("GMMR0FreePages: pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pVM, cPages, paPages, enmAccount));
3247
3248	/*
3249	* Validate input and get the basics.
3250	*/
3251	PGMM pGMM;
3252	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3253	PGVM pGVM;
3254	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3255	if (RT_FAILURE(rc))
3256	return rc;
3257
3258	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
3259	AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
3260	AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
3261
3262	for (unsigned iPage = 0; iPage < cPages; iPage++)
3263	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
3264	/\|\| paPages[iPage].idPage == NIL_GMM_PAGEID/,
3265	("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
3266
3267	/*
3268	* Take the semaphore and call the worker function.
3269	*/
3270	gmmR0MutexAcquire(pGMM);
3271	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3272	{
3273	rc = gmmR0FreePages(pGMM, pGVM, cPages, paPages, enmAccount);
3274	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3275	}
3276	else
3277	rc = VERR_INTERNAL_ERROR_5;
3278	gmmR0MutexRelease(pGMM);
3279	LogFlow(("GMMR0FreePages: returns %Rrc\n", rc));
3280	return rc;
3281	}
3282
3283
3284	/**
3285	* VMMR0 request wrapper for GMMR0FreePages.
3286	*
3287	* @returns see GMMR0FreePages.
3288	* @param pVM Pointer to the shared VM structure.
3289	* @param idCpu VCPU id
3290	* @param pReq The request packet.
3291	*/
3292	GMMR0DECL(int) GMMR0FreePagesReq(PVM pVM, VMCPUID idCpu, PGMMFREEPAGESREQ pReq)
3293	{
3294	/*
3295	* Validate input and pass it on.
3296	*/
3297	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3298	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3299	AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0]),
3300	("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0])),
3301	VERR_INVALID_PARAMETER);
3302	AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[pReq->cPages]),
3303	("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[pReq->cPages])),
3304	VERR_INVALID_PARAMETER);
3305
3306	return GMMR0FreePages(pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
3307	}
3308
3309
3310	/**
3311	* Report back on a memory ballooning request.
3312	*
3313	* The request may or may not have been initiated by the GMM. If it was initiated
3314	* by the GMM it is important that this function is called even if no pages were
3315	* ballooned.
3316	*
3317	* @returns VBox status code:
3318	* @retval VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH
3319	* @retval VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH
3320	* @retval VERR_GMM_OVERCOMMITTED_TRY_AGAIN_IN_A_BIT - reset condition
3321	* indicating that we won't necessarily have sufficient RAM to boot
3322	* the VM again and that it should pause until this changes (we'll try
3323	* balloon some other VM). (For standard deflate we have little choice
3324	* but to hope the VM won't use the memory that was returned to it.)
3325	*
3326	* @param pVM Pointer to the shared VM structure.
3327	* @param idCpu VCPU id
3328	* @param enmAction Inflate/deflate/reset
3329	* @param cBalloonedPages The number of pages that was ballooned.
3330	*
3331	* @thread EMT.
3332	*/
3333	GMMR0DECL(int) GMMR0BalloonedPages(PVM pVM, VMCPUID idCpu, GMMBALLOONACTION enmAction, uint32_t cBalloonedPages)
3334	{
3335	LogFlow(("GMMR0BalloonedPages: pVM=%p enmAction=%d cBalloonedPages=%#x\n",
3336	pVM, enmAction, cBalloonedPages));
3337
3338	AssertMsgReturn(cBalloonedPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cBalloonedPages), VERR_INVALID_PARAMETER);
3339
3340	/*
3341	* Validate input and get the basics.
3342	*/
3343	PGMM pGMM;
3344	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3345	PGVM pGVM;
3346	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3347	if (RT_FAILURE(rc))
3348	return rc;
3349
3350	/*
3351	* Take the semaphore and do some more validations.
3352	*/
3353	gmmR0MutexAcquire(pGMM);
3354	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3355	{
3356	switch (enmAction)
3357	{
3358	case GMMBALLOONACTION_INFLATE:
3359	{
3360	if (RT_LIKELY(pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cBalloonedPages <= pGVM->gmm.s.Reserved.cBasePages))
3361	{
3362	/*
3363	* Record the ballooned memory.
3364	*/
3365	pGMM->cBalloonedPages += cBalloonedPages;
3366	if (pGVM->gmm.s.cReqBalloonedPages)
3367	{
3368	/* Codepath never taken. Might be interesting in the future to request ballooned memory from guests in low memory conditions.. */
3369	AssertFailed();
3370
3371	pGVM->gmm.s.cBalloonedPages += cBalloonedPages;
3372	pGVM->gmm.s.cReqActuallyBalloonedPages += cBalloonedPages;
3373	Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx Req=%#llx Actual=%#llx (pending)\n", cBalloonedPages,
3374	pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqBalloonedPages, pGVM->gmm.s.cReqActuallyBalloonedPages));
3375	}
3376	else
3377	{
3378	pGVM->gmm.s.cBalloonedPages += cBalloonedPages;
3379	Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx (user)\n",
3380	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages));
3381	}
3382	}
3383	else
3384	{
3385	Log(("GMMR0BalloonedPages: cBasePages=%#llx Total=%#llx cBalloonedPages=%#llx Reserved=%#llx\n",
3386	pGVM->gmm.s.Allocated.cBasePages, pGVM->gmm.s.cBalloonedPages, cBalloonedPages, pGVM->gmm.s.Reserved.cBasePages));
3387	rc = VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3388	}
3389	break;
3390	}
3391
3392	case GMMBALLOONACTION_DEFLATE:
3393	{
3394	/* Deflate. */
3395	if (pGVM->gmm.s.cBalloonedPages >= cBalloonedPages)
3396	{
3397	/*
3398	* Record the ballooned memory.
3399	*/
3400	Assert(pGMM->cBalloonedPages >= cBalloonedPages);
3401	pGMM->cBalloonedPages -= cBalloonedPages;
3402	pGVM->gmm.s.cBalloonedPages -= cBalloonedPages;
3403	if (pGVM->gmm.s.cReqDeflatePages)
3404	{
3405	AssertFailed(); /* This is path is for later. */
3406	Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx Req=%#llx\n",
3407	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqDeflatePages));
3408
3409	/*
3410	* Anything we need to do here now when the request has been completed?
3411	*/
3412	pGVM->gmm.s.cReqDeflatePages = 0;
3413	}
3414	else
3415	Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx (user)\n",
3416	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages));
3417	}
3418	else
3419	{
3420	Log(("GMMR0BalloonedPages: Total=%#llx cBalloonedPages=%#llx\n", pGVM->gmm.s.cBalloonedPages, cBalloonedPages));
3421	rc = VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH;
3422	}
3423	break;
3424	}
3425
3426	case GMMBALLOONACTION_RESET:
3427	{
3428	/* Reset to an empty balloon. */
3429	Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.cBalloonedPages);
3430
3431	pGMM->cBalloonedPages -= pGVM->gmm.s.cBalloonedPages;
3432	pGVM->gmm.s.cBalloonedPages = 0;
3433	break;
3434	}
3435
3436	default:
3437	rc = VERR_INVALID_PARAMETER;
3438	break;
3439	}
3440	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3441	}
3442	else
3443	rc = VERR_INTERNAL_ERROR_5;
3444
3445	gmmR0MutexRelease(pGMM);
3446	LogFlow(("GMMR0BalloonedPages: returns %Rrc\n", rc));
3447	return rc;
3448	}
3449
3450
3451	/**
3452	* VMMR0 request wrapper for GMMR0BalloonedPages.
3453	*
3454	* @returns see GMMR0BalloonedPages.
3455	* @param pVM Pointer to the shared VM structure.
3456	* @param idCpu VCPU id
3457	* @param pReq The request packet.
3458	*/
3459	GMMR0DECL(int) GMMR0BalloonedPagesReq(PVM pVM, VMCPUID idCpu, PGMMBALLOONEDPAGESREQ pReq)
3460	{
3461	/*
3462	* Validate input and pass it on.
3463	*/
3464	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3465	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3466	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMBALLOONEDPAGESREQ),
3467	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMBALLOONEDPAGESREQ)),
3468	VERR_INVALID_PARAMETER);
3469
3470	return GMMR0BalloonedPages(pVM, idCpu, pReq->enmAction, pReq->cBalloonedPages);
3471	}
3472
3473	/**
3474	* Return memory statistics for the hypervisor
3475	*
3476	* @returns VBox status code:
3477	* @param pVM Pointer to the shared VM structure.
3478	* @param pReq The request packet.
3479	*/
3480	GMMR0DECL(int) GMMR0QueryHypervisorMemoryStatsReq(PVM pVM, PGMMMEMSTATSREQ pReq)
3481	{
3482	/*
3483	* Validate input and pass it on.
3484	*/
3485	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3486	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3487	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
3488	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
3489	VERR_INVALID_PARAMETER);
3490
3491	/*
3492	* Validate input and get the basics.
3493	*/
3494	PGMM pGMM;
3495	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3496	pReq->cAllocPages = pGMM->cAllocatedPages;
3497	pReq->cFreePages = (pGMM->cChunks << (GMM_CHUNK_SHIFT- PAGE_SHIFT)) - pGMM->cAllocatedPages;
3498	pReq->cBalloonedPages = pGMM->cBalloonedPages;
3499	pReq->cMaxPages = pGMM->cMaxPages;
3500	pReq->cSharedPages = pGMM->cDuplicatePages;
3501	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3502
3503	return VINF_SUCCESS;
3504	}
3505
3506	/**
3507	* Return memory statistics for the VM
3508	*
3509	* @returns VBox status code:
3510	* @param pVM Pointer to the shared VM structure.
3511	* @parma idCpu Cpu id.
3512	* @param pReq The request packet.
3513	*/
3514	GMMR0DECL(int) GMMR0QueryMemoryStatsReq(PVM pVM, VMCPUID idCpu, PGMMMEMSTATSREQ pReq)
3515	{
3516	/*
3517	* Validate input and pass it on.
3518	*/
3519	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3520	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3521	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
3522	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
3523	VERR_INVALID_PARAMETER);
3524
3525	/*
3526	* Validate input and get the basics.
3527	*/
3528	PGMM pGMM;
3529	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3530	PGVM pGVM;
3531	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3532	if (RT_FAILURE(rc))
3533	return rc;
3534
3535	/*
3536	* Take the semaphore and do some more validations.
3537	*/
3538	gmmR0MutexAcquire(pGMM);
3539	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3540	{
3541	pReq->cAllocPages = pGVM->gmm.s.Allocated.cBasePages;
3542	pReq->cBalloonedPages = pGVM->gmm.s.cBalloonedPages;
3543	pReq->cMaxPages = pGVM->gmm.s.Reserved.cBasePages;
3544	pReq->cFreePages = pReq->cMaxPages - pReq->cAllocPages;
3545	}
3546	else
3547	rc = VERR_INTERNAL_ERROR_5;
3548
3549	gmmR0MutexRelease(pGMM);
3550	LogFlow(("GMMR3QueryVMMemoryStats: returns %Rrc\n", rc));
3551	return rc;
3552	}
3553
3554
3555	/**
3556	* Worker for gmmR0UnmapChunk and gmmr0FreeChunk.
3557	*
3558	* Don't call this in legacy allocation mode!
3559	*
3560	* @returns VBox status code.
3561	* @param pGMM Pointer to the GMM instance data.
3562	* @param pGVM Pointer to the Global VM structure.
3563	* @param pChunk Pointer to the chunk to be unmapped.
3564	*/
3565	static int gmmR0UnmapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
3566	{
3567	Assert(!pGMM->fLegacyAllocationMode);
3568
3569	/*
3570	* Find the mapping and try unmapping it.
3571	*/
3572	uint32_t cMappings = pChunk->cMappingsX;
3573	for (uint32_t i = 0; i < cMappings; i++)
3574	{
3575	Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
3576	if (pChunk->paMappingsX[i].pGVM == pGVM)
3577	{
3578	/* unmap */
3579	int rc = RTR0MemObjFree(pChunk->paMappingsX[i].hMapObj, false /* fFreeMappings (NA) */);
3580	if (RT_SUCCESS(rc))
3581	{
3582	/* update the record. */
3583	cMappings--;
3584	if (i < cMappings)
3585	pChunk->paMappingsX[i] = pChunk->paMappingsX[cMappings];
3586	pChunk->paMappingsX[cMappings].hMapObj = NIL_RTR0MEMOBJ;
3587	pChunk->paMappingsX[cMappings].pGVM = NULL;
3588	Assert(pChunk->cMappingsX - 1U == cMappings);
3589	pChunk->cMappingsX = cMappings;
3590	}
3591
3592	return rc;
3593	}
3594	}
3595
3596	Log(("gmmR0UnmapChunk: Chunk %#x is not mapped into pGVM=%p/%#x\n", pChunk->Core.Key, pGVM, pGVM->hSelf));
3597	return VERR_GMM_CHUNK_NOT_MAPPED;
3598	}
3599
3600
3601	/**
3602	* Unmaps a chunk previously mapped into the address space of the current process.
3603	*
3604	* @returns VBox status code.
3605	* @param pGMM Pointer to the GMM instance data.
3606	* @param pGVM Pointer to the Global VM structure.
3607	* @param pChunk Pointer to the chunk to be unmapped.
3608	*/
3609	static int gmmR0UnmapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem)
3610	{
3611	if (!pGMM->fLegacyAllocationMode)
3612	{
3613	/*
3614	* Lock the chunk and if possible leave the giant GMM lock.
3615	*/
3616	GMMR0CHUNKMTXSTATE MtxState;
3617	int rc = gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk,
3618	fRelaxedSem ? GMMR0CHUNK_MTX_RETAKE_GIANT : GMMR0CHUNK_MTX_KEEP_GIANT);
3619	if (RT_SUCCESS(rc))
3620	{
3621	rc = gmmR0UnmapChunkLocked(pGMM, pGVM, pChunk);
3622	gmmR0ChunkMutexRelease(&MtxState, pChunk);
3623	}
3624	return rc;
3625	}
3626
3627	if (pChunk->hGVM == pGVM->hSelf)
3628	return VINF_SUCCESS;
3629
3630	Log(("gmmR0UnmapChunk: Chunk %#x is not mapped into pGVM=%p/%#x (legacy)\n", pChunk->Core.Key, pGVM, pGVM->hSelf));
3631	return VERR_GMM_CHUNK_NOT_MAPPED;
3632	}
3633
3634
3635	/**
3636	* Worker for gmmR0MapChunk.
3637	*
3638	* @returns VBox status code.
3639	* @param pGMM Pointer to the GMM instance data.
3640	* @param pGVM Pointer to the Global VM structure.
3641	* @param pChunk Pointer to the chunk to be mapped.
3642	* @param ppvR3 Where to store the ring-3 address of the mapping.
3643	* In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
3644	* contain the address of the existing mapping.
3645	*/
3646	static int gmmR0MapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
3647	{
3648	/*
3649	* If we're in legacy mode this is simple.
3650	*/
3651	if (pGMM->fLegacyAllocationMode)
3652	{
3653	if (pChunk->hGVM != pGVM->hSelf)
3654	{
3655	Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
3656	return VERR_GMM_CHUNK_NOT_FOUND;
3657	}
3658
3659	*ppvR3 = RTR0MemObjAddressR3(pChunk->hMemObj);
3660	return VINF_SUCCESS;
3661	}
3662
3663	/*
3664	* Check to see if the chunk is already mapped.
3665	*/
3666	for (uint32_t i = 0; i < pChunk->cMappingsX; i++)
3667	{
3668	Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
3669	if (pChunk->paMappingsX[i].pGVM == pGVM)
3670	{
3671	*ppvR3 = RTR0MemObjAddressR3(pChunk->paMappingsX[i].hMapObj);
3672	Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
3673	#ifdef VBOX_WITH_PAGE_SHARING
3674	/* The ring-3 chunk cache can be out of sync; don't fail. */
3675	return VINF_SUCCESS;
3676	#else
3677	return VERR_GMM_CHUNK_ALREADY_MAPPED;
3678	#endif
3679	}
3680	}
3681
3682	/*
3683	* Do the mapping.
3684	*/
3685	RTR0MEMOBJ hMapObj;
3686	int rc = RTR0MemObjMapUser(&hMapObj, pChunk->hMemObj, (RTR3PTR)-1, 0, RTMEM_PROT_READ \| RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
3687	if (RT_SUCCESS(rc))
3688	{
3689	/* reallocate the array? assumes few users per chunk (usually one). */
3690	unsigned iMapping = pChunk->cMappingsX;
3691	if ( iMapping <= 3
3692	\|\| (iMapping & 3) == 0)
3693	{
3694	unsigned cNewSize = iMapping <= 3
3695	? iMapping + 1
3696	: iMapping + 4;
3697	Assert(cNewSize < 4 \|\| RT_ALIGN_32(cNewSize, 4) == cNewSize);
3698	if (RT_UNLIKELY(cNewSize > UINT16_MAX))
3699	{
3700	rc = RTR0MemObjFree(hMapObj, false /* fFreeMappings (NA) */); AssertRC(rc);
3701	return VERR_GMM_TOO_MANY_CHUNK_MAPPINGS;
3702	}
3703
3704	void pvMappings = RTMemRealloc(pChunk->paMappingsX, cNewSize sizeof(pChunk->paMappingsX[0]));
3705	if (RT_UNLIKELY(!pvMappings))
3706	{
3707	rc = RTR0MemObjFree(hMapObj, false /* fFreeMappings (NA) */); AssertRC(rc);
3708	return VERR_NO_MEMORY;
3709	}
3710	pChunk->paMappingsX = (PGMMCHUNKMAP)pvMappings;
3711	}
3712
3713	/* insert new entry */
3714	pChunk->paMappingsX[iMapping].hMapObj = hMapObj;
3715	pChunk->paMappingsX[iMapping].pGVM = pGVM;
3716	Assert(pChunk->cMappingsX == iMapping);
3717	pChunk->cMappingsX = iMapping + 1;
3718
3719	*ppvR3 = RTR0MemObjAddressR3(hMapObj);
3720	}
3721
3722	return rc;
3723	}
3724
3725
3726	/**
3727	* Maps a chunk into the user address space of the current process.
3728	*
3729	* @returns VBox status code.
3730	* @param pGMM Pointer to the GMM instance data.
3731	* @param pGVM Pointer to the Global VM structure.
3732	* @param pChunk Pointer to the chunk to be mapped.
3733	* @param fRelaxedSem Whether we can release the semaphore while doing the
3734	* locking (@c true) or not.
3735	* @param ppvR3 Where to store the ring-3 address of the mapping.
3736	* In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
3737	* contain the address of the existing mapping.
3738	*/
3739	static int gmmR0MapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem, PRTR3PTR ppvR3)
3740	{
3741	/*
3742	* Take the chunk lock and leave the giant GMM lock when possible, then
3743	* call the worker function.
3744	*/
3745	GMMR0CHUNKMTXSTATE MtxState;
3746	int rc = gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk,
3747	fRelaxedSem ? GMMR0CHUNK_MTX_RETAKE_GIANT : GMMR0CHUNK_MTX_KEEP_GIANT);
3748	if (RT_SUCCESS(rc))
3749	{
3750	rc = gmmR0MapChunkLocked(pGMM, pGVM, pChunk, ppvR3);
3751	gmmR0ChunkMutexRelease(&MtxState, pChunk);
3752	}
3753
3754	return rc;
3755	}
3756
3757
3758
3759	/**
3760	* Check if a chunk is mapped into the specified VM
3761	*
3762	* @returns mapped yes/no
3763	* @param pGMM Pointer to the GMM instance.
3764	* @param pGVM Pointer to the Global VM structure.
3765	* @param pChunk Pointer to the chunk to be mapped.
3766	* @param ppvR3 Where to store the ring-3 address of the mapping.
3767	*/
3768	static int gmmR0IsChunkMapped(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
3769	{
3770	GMMR0CHUNKMTXSTATE MtxState;
3771	gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
3772	for (uint32_t i = 0; i < pChunk->cMappingsX; i++)
3773	{
3774	Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
3775	if (pChunk->paMappingsX[i].pGVM == pGVM)
3776	{
3777	*ppvR3 = RTR0MemObjAddressR3(pChunk->paMappingsX[i].hMapObj);
3778	gmmR0ChunkMutexRelease(&MtxState, pChunk);
3779	return true;
3780	}
3781	}
3782	*ppvR3 = NULL;
3783	gmmR0ChunkMutexRelease(&MtxState, pChunk);
3784	return false;
3785	}
3786
3787
3788	/**
3789	* Map a chunk and/or unmap another chunk.
3790	*
3791	* The mapping and unmapping applies to the current process.
3792	*
3793	* This API does two things because it saves a kernel call per mapping when
3794	* when the ring-3 mapping cache is full.
3795	*
3796	* @returns VBox status code.
3797	* @param pVM The VM.
3798	* @param idChunkMap The chunk to map. NIL_GMM_CHUNKID if nothing to map.
3799	* @param idChunkUnmap The chunk to unmap. NIL_GMM_CHUNKID if nothing to unmap.
3800	* @param ppvR3 Where to store the address of the mapped chunk. NULL is ok if nothing to map.
3801	* @thread EMT
3802	*/
3803	GMMR0DECL(int) GMMR0MapUnmapChunk(PVM pVM, uint32_t idChunkMap, uint32_t idChunkUnmap, PRTR3PTR ppvR3)
3804	{
3805	LogFlow(("GMMR0MapUnmapChunk: pVM=%p idChunkMap=%#x idChunkUnmap=%#x ppvR3=%p\n",
3806	pVM, idChunkMap, idChunkUnmap, ppvR3));
3807
3808	/*
3809	* Validate input and get the basics.
3810	*/
3811	PGMM pGMM;
3812	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3813	PGVM pGVM;
3814	int rc = GVMMR0ByVM(pVM, &pGVM);
3815	if (RT_FAILURE(rc))
3816	return rc;
3817
3818	AssertCompile(NIL_GMM_CHUNKID == 0);
3819	AssertMsgReturn(idChunkMap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkMap), VERR_INVALID_PARAMETER);
3820	AssertMsgReturn(idChunkUnmap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkUnmap), VERR_INVALID_PARAMETER);
3821
3822	if ( idChunkMap == NIL_GMM_CHUNKID
3823	&& idChunkUnmap == NIL_GMM_CHUNKID)
3824	return VERR_INVALID_PARAMETER;
3825
3826	if (idChunkMap != NIL_GMM_CHUNKID)
3827	{
3828	AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3829	*ppvR3 = NIL_RTR3PTR;
3830	}
3831
3832	/*
3833	* Take the semaphore and do the work.
3834	*
3835	* The unmapping is done last since it's easier to undo a mapping than
3836	* undoing an unmapping. The ring-3 mapping cache cannot not be so big
3837	* that it pushes the user virtual address space to within a chunk of
3838	* it it's limits, so, no problem here.
3839	*/
3840	gmmR0MutexAcquire(pGMM);
3841	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3842	{
3843	PGMMCHUNK pMap = NULL;
3844	if (idChunkMap != NIL_GVM_HANDLE)
3845	{
3846	pMap = gmmR0GetChunk(pGMM, idChunkMap);
3847	if (RT_LIKELY(pMap))
3848	rc = gmmR0MapChunk(pGMM, pGVM, pMap, true /fRelaxedSem/, ppvR3);
3849	else
3850	{
3851	Log(("GMMR0MapUnmapChunk: idChunkMap=%#x\n", idChunkMap));
3852	rc = VERR_GMM_CHUNK_NOT_FOUND;
3853	}
3854	}
3855	/** @todo split this operation, the bail out might (theoretcially) not be
3856	* entirely safe. */
3857
3858	if ( idChunkUnmap != NIL_GMM_CHUNKID
3859	&& RT_SUCCESS(rc))
3860	{
3861	PGMMCHUNK pUnmap = gmmR0GetChunk(pGMM, idChunkUnmap);
3862	if (RT_LIKELY(pUnmap))
3863	rc = gmmR0UnmapChunk(pGMM, pGVM, pUnmap, true /fRelaxedSem/);
3864	else
3865	{
3866	Log(("GMMR0MapUnmapChunk: idChunkUnmap=%#x\n", idChunkUnmap));
3867	rc = VERR_GMM_CHUNK_NOT_FOUND;
3868	}
3869
3870	if (RT_FAILURE(rc) && pMap)
3871	gmmR0UnmapChunk(pGMM, pGVM, pMap, false /fRelaxedSem/);
3872	}
3873
3874	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3875	}
3876	else
3877	rc = VERR_INTERNAL_ERROR_5;
3878	gmmR0MutexRelease(pGMM);
3879
3880	LogFlow(("GMMR0MapUnmapChunk: returns %Rrc\n", rc));
3881	return rc;
3882	}
3883
3884
3885	/**
3886	* VMMR0 request wrapper for GMMR0MapUnmapChunk.
3887	*
3888	* @returns see GMMR0MapUnmapChunk.
3889	* @param pVM Pointer to the shared VM structure.
3890	* @param pReq The request packet.
3891	*/
3892	GMMR0DECL(int) GMMR0MapUnmapChunkReq(PVM pVM, PGMMMAPUNMAPCHUNKREQ pReq)
3893	{
3894	/*
3895	* Validate input and pass it on.
3896	*/
3897	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3898	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3899	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
3900
3901	return GMMR0MapUnmapChunk(pVM, pReq->idChunkMap, pReq->idChunkUnmap, &pReq->pvR3);
3902	}
3903
3904
3905	/**
3906	* Legacy mode API for supplying pages.
3907	*
3908	* The specified user address points to a allocation chunk sized block that
3909	* will be locked down and used by the GMM when the GM asks for pages.
3910	*
3911	* @returns VBox status code.
3912	* @param pVM The VM.
3913	* @param idCpu VCPU id
3914	* @param pvR3 Pointer to the chunk size memory block to lock down.
3915	*/
3916	GMMR0DECL(int) GMMR0SeedChunk(PVM pVM, VMCPUID idCpu, RTR3PTR pvR3)
3917	{
3918	/*
3919	* Validate input and get the basics.
3920	*/
3921	PGMM pGMM;
3922	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3923	PGVM pGVM;
3924	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3925	if (RT_FAILURE(rc))
3926	return rc;
3927
3928	AssertPtrReturn(pvR3, VERR_INVALID_POINTER);
3929	AssertReturn(!(PAGE_OFFSET_MASK & pvR3), VERR_INVALID_POINTER);
3930
3931	if (!pGMM->fLegacyAllocationMode)
3932	{
3933	Log(("GMMR0SeedChunk: not in legacy allocation mode!\n"));
3934	return VERR_NOT_SUPPORTED;
3935	}
3936
3937	/*
3938	* Lock the memory and add it as new chunk with our hGVM.
3939	* (The GMM locking is done inside gmmR0RegisterChunk.)
3940	*/
3941	RTR0MEMOBJ MemObj;
3942	rc = RTR0MemObjLockUser(&MemObj, pvR3, GMM_CHUNK_SIZE, RTMEM_PROT_READ \| RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
3943	if (RT_SUCCESS(rc))
3944	{
3945	rc = gmmR0RegisterChunk(pGMM, &pGMM->Private, MemObj, pGVM->hSelf, 0 /fChunkFlags/, NULL);
3946	if (RT_SUCCESS(rc))
3947	gmmR0MutexRelease(pGMM);
3948	else
3949	RTR0MemObjFree(MemObj, false /* fFreeMappings */);
3950	}
3951
3952	LogFlow(("GMMR0SeedChunk: rc=%d (pvR3=%p)\n", rc, pvR3));
3953	return rc;
3954	}
3955
3956
3957	typedef struct
3958	{
3959	PAVLGCPTRNODECORE pNode;
3960	char *pszModuleName;
3961	char *pszVersion;
3962	VBOXOSFAMILY enmGuestOS;
3963	} GMMFINDMODULEBYNAME, *PGMMFINDMODULEBYNAME;
3964
3965	/**
3966	* Tree enumeration callback for finding identical modules by name and version
3967	*/
3968	DECLCALLBACK(int) gmmR0CheckForIdenticalModule(PAVLGCPTRNODECORE pNode, void *pvUser)
3969	{
3970	PGMMFINDMODULEBYNAME pInfo = (PGMMFINDMODULEBYNAME)pvUser;
3971	PGMMSHAREDMODULE pModule = (PGMMSHAREDMODULE)pNode;
3972
3973	if ( pInfo
3974	&& pInfo->enmGuestOS == pModule->enmGuestOS
3975	/** @todo replace with RTStrNCmp */
3976	&& !strcmp(pModule->szName, pInfo->pszModuleName)
3977	&& !strcmp(pModule->szVersion, pInfo->pszVersion))
3978	{
3979	pInfo->pNode = pNode;
3980	return 1; /* stop search */
3981	}
3982	return 0;
3983	}
3984
3985
3986	/**
3987	* Registers a new shared module for the VM
3988	*
3989	* @returns VBox status code.
3990	* @param pVM VM handle
3991	* @param idCpu VCPU id
3992	* @param enmGuestOS Guest OS type
3993	* @param pszModuleName Module name
3994	* @param pszVersion Module version
3995	* @param GCBaseAddr Module base address
3996	* @param cbModule Module size
3997	* @param cRegions Number of shared region descriptors
3998	* @param pRegions Shared region(s)
3999	*/
4000	GMMR0DECL(int) GMMR0RegisterSharedModule(PVM pVM, VMCPUID idCpu, VBOXOSFAMILY enmGuestOS, char pszModuleName, char pszVersion, RTGCPTR GCBaseAddr, uint32_t cbModule,
4001	unsigned cRegions, VMMDEVSHAREDREGIONDESC *pRegions)
4002	{
4003	#ifdef VBOX_WITH_PAGE_SHARING
4004	/*
4005	* Validate input and get the basics.
4006	*/
4007	PGMM pGMM;
4008	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4009	PGVM pGVM;
4010	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
4011	if (RT_FAILURE(rc))
4012	return rc;
4013
4014	Log(("GMMR0RegisterSharedModule %s %s base %RGv size %x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule));
4015
4016	/*
4017	* Take the semaphore and do some more validations.
4018	*/
4019	gmmR0MutexAcquire(pGMM);
4020	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4021	{
4022	bool fNewModule = false;
4023
4024	/* Check if this module is already locally registered. */
4025	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
4026	if (!pRecVM)
4027	{
4028	pRecVM = (PGMMSHAREDMODULEPERVM)RTMemAllocZ(RT_OFFSETOF(GMMSHAREDMODULEPERVM, aRegions[cRegions]));
4029	if (!pRecVM)
4030	{
4031	AssertFailed();
4032	rc = VERR_NO_MEMORY;
4033	goto end;
4034	}
4035	pRecVM->Core.Key = GCBaseAddr;
4036	pRecVM->cRegions = cRegions;
4037
4038	/* Save the region data as they can differ between VMs (address space scrambling or simply different loading order) */
4039	for (unsigned i = 0; i < cRegions; i++)
4040	{
4041	pRecVM->aRegions[i].GCRegionAddr = pRegions[i].GCRegionAddr;
4042	pRecVM->aRegions[i].cbRegion = RT_ALIGN_T(pRegions[i].cbRegion, PAGE_SIZE, uint32_t);
4043	pRecVM->aRegions[i].u32Alignment = 0;
4044	pRecVM->aRegions[i].paHCPhysPageID = NULL; /* unused */
4045	}
4046
4047	bool ret = RTAvlGCPtrInsert(&pGVM->gmm.s.pSharedModuleTree, &pRecVM->Core);
4048	Assert(ret);
4049
4050	Log(("GMMR0RegisterSharedModule: new local module %s\n", pszModuleName));
4051	fNewModule = true;
4052	}
4053	else
4054	rc = VINF_PGM_SHARED_MODULE_ALREADY_REGISTERED;
4055
4056	/* Check if this module is already globally registered. */
4057	PGMMSHAREDMODULE pGlobalModule = (PGMMSHAREDMODULE)RTAvlGCPtrGet(&pGMM->pGlobalSharedModuleTree, GCBaseAddr);
4058	if ( !pGlobalModule
4059	&& enmGuestOS == VBOXOSFAMILY_Windows64)
4060	{
4061	/* Two identical copies of e.g. Win7 x64 will typically not have a similar virtual address space layout for dlls or kernel modules.
4062	* Try to find identical binaries based on name and version.
4063	*/
4064	GMMFINDMODULEBYNAME Info;
4065
4066	Info.pNode = NULL;
4067	Info.pszVersion = pszVersion;
4068	Info.pszModuleName = pszModuleName;
4069	Info.enmGuestOS = enmGuestOS;
4070
4071	Log(("Try to find identical module %s\n", pszModuleName));
4072	int ret = RTAvlGCPtrDoWithAll(&pGMM->pGlobalSharedModuleTree, true /* fFromLeft */, gmmR0CheckForIdenticalModule, &Info);
4073	if (ret == 1)
4074	{
4075	Assert(Info.pNode);
4076	pGlobalModule = (PGMMSHAREDMODULE)Info.pNode;
4077	Log(("Found identical module at %RGv\n", pGlobalModule->Core.Key));
4078	}
4079	}
4080
4081	if (!pGlobalModule)
4082	{
4083	Assert(fNewModule);
4084	Assert(!pRecVM->fCollision);
4085
4086	pGlobalModule = (PGMMSHAREDMODULE)RTMemAllocZ(RT_OFFSETOF(GMMSHAREDMODULE, aRegions[cRegions]));
4087	if (!pGlobalModule)
4088	{
4089	AssertFailed();
4090	rc = VERR_NO_MEMORY;
4091	goto end;
4092	}
4093
4094	pGlobalModule->Core.Key = GCBaseAddr;
4095	pGlobalModule->cbModule = cbModule;
4096	/* Input limit already safe; no need to check again. */
4097	/** @todo replace with RTStrCopy */
4098	strcpy(pGlobalModule->szName, pszModuleName);
4099	strcpy(pGlobalModule->szVersion, pszVersion);
4100
4101	pGlobalModule->enmGuestOS = enmGuestOS;
4102	pGlobalModule->cRegions = cRegions;
4103
4104	for (unsigned i = 0; i < cRegions; i++)
4105	{
4106	Log(("New region %d base=%RGv size %x\n", i, pRegions[i].GCRegionAddr, pRegions[i].cbRegion));
4107	pGlobalModule->aRegions[i].GCRegionAddr = pRegions[i].GCRegionAddr;
4108	pGlobalModule->aRegions[i].cbRegion = RT_ALIGN_T(pRegions[i].cbRegion, PAGE_SIZE, uint32_t);
4109	pGlobalModule->aRegions[i].u32Alignment = 0;
4110	pGlobalModule->aRegions[i].paHCPhysPageID = NULL; /* uninitialized. */
4111	}
4112
4113	/* Save reference. */
4114	pRecVM->pGlobalModule = pGlobalModule;
4115	pRecVM->fCollision = false;
4116	pGlobalModule->cUsers++;
4117	rc = VINF_SUCCESS;
4118
4119	bool ret = RTAvlGCPtrInsert(&pGMM->pGlobalSharedModuleTree, &pGlobalModule->Core);
4120	Assert(ret);
4121
4122	Log(("GMMR0RegisterSharedModule: new global module %s\n", pszModuleName));
4123	}
4124	else
4125	{
4126	Assert(pGlobalModule->cUsers > 0);
4127
4128	/* Make sure the name and version are identical. */
4129	/** @todo replace with RTStrNCmp */
4130	if ( !strcmp(pGlobalModule->szName, pszModuleName)
4131	&& !strcmp(pGlobalModule->szVersion, pszVersion))
4132	{
4133	/* Save reference. */
4134	pRecVM->pGlobalModule = pGlobalModule;
4135	if ( fNewModule
4136	\|\| pRecVM->fCollision == true) /* colliding module unregistered and new one registered since the last check */
4137	{
4138	pGlobalModule->cUsers++;
4139	Log(("GMMR0RegisterSharedModule: using existing module %s cUser=%d!\n", pszModuleName, pGlobalModule->cUsers));
4140	}
4141	pRecVM->fCollision = false;
4142	rc = VINF_SUCCESS;
4143	}
4144	else
4145	{
4146	Log(("GMMR0RegisterSharedModule: module %s collision!\n", pszModuleName));
4147	pRecVM->fCollision = true;
4148	rc = VINF_PGM_SHARED_MODULE_COLLISION;
4149	goto end;
4150	}
4151	}
4152
4153	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4154	}
4155	else
4156	rc = VERR_INTERNAL_ERROR_5;
4157
4158	end:
4159	gmmR0MutexRelease(pGMM);
4160	return rc;
4161	#else
4162	return VERR_NOT_IMPLEMENTED;
4163	#endif
4164	}
4165
4166
4167	/**
4168	* VMMR0 request wrapper for GMMR0RegisterSharedModule.
4169	*
4170	* @returns see GMMR0RegisterSharedModule.
4171	* @param pVM Pointer to the shared VM structure.
4172	* @param idCpu VCPU id
4173	* @param pReq The request packet.
4174	*/
4175	GMMR0DECL(int) GMMR0RegisterSharedModuleReq(PVM pVM, VMCPUID idCpu, PGMMREGISTERSHAREDMODULEREQ pReq)
4176	{
4177	/*
4178	* Validate input and pass it on.
4179	*/
4180	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
4181	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
4182	AssertMsgReturn(pReq->Hdr.cbReq >= sizeof(pReq) && pReq->Hdr.cbReq == RT_UOFFSETOF(GMMREGISTERSHAREDMODULEREQ, aRegions[pReq->cRegions]), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
4183
4184	/* Pass back return code in the request packet to preserve informational codes. (VMMR3CallR0 chokes on them) */
4185	pReq->rc = GMMR0RegisterSharedModule(pVM, idCpu, pReq->enmGuestOS, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule, pReq->cRegions, pReq->aRegions);
4186	return VINF_SUCCESS;
4187	}
4188
4189	/**
4190	* Unregisters a shared module for the VM
4191	*
4192	* @returns VBox status code.
4193	* @param pVM VM handle
4194	* @param idCpu VCPU id
4195	* @param pszModuleName Module name
4196	* @param pszVersion Module version
4197	* @param GCBaseAddr Module base address
4198	* @param cbModule Module size
4199	*/
4200	GMMR0DECL(int) GMMR0UnregisterSharedModule(PVM pVM, VMCPUID idCpu, char pszModuleName, char pszVersion, RTGCPTR GCBaseAddr, uint32_t cbModule)
4201	{
4202	#ifdef VBOX_WITH_PAGE_SHARING
4203	/*
4204	* Validate input and get the basics.
4205	*/
4206	PGMM pGMM;
4207	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4208	PGVM pGVM;
4209	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
4210	if (RT_FAILURE(rc))
4211	return rc;
4212
4213	Log(("GMMR0UnregisterSharedModule %s %s base=%RGv size %x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule));
4214
4215	/*
4216	* Take the semaphore and do some more validations.
4217	*/
4218	gmmR0MutexAcquire(pGMM);
4219	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4220	{
4221	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
4222	if (pRecVM)
4223	{
4224	/* Remove reference to global shared module. */
4225	if (!pRecVM->fCollision)
4226	{
4227	PGMMSHAREDMODULE pRec = pRecVM->pGlobalModule;
4228	Assert(pRec);
4229
4230	if (pRec) /* paranoia */
4231	{
4232	Assert(pRec->cUsers);
4233	pRec->cUsers--;
4234	if (pRec->cUsers == 0)
4235	{
4236	/* Free the ranges, but leave the pages intact as there might still be references; they will be cleared by the COW mechanism. */
4237	for (unsigned i = 0; i < pRec->cRegions; i++)
4238	if (pRec->aRegions[i].paHCPhysPageID)
4239	RTMemFree(pRec->aRegions[i].paHCPhysPageID);
4240
4241	Assert(pRec->Core.Key == GCBaseAddr \|\| pRec->enmGuestOS == VBOXOSFAMILY_Windows64);
4242	Assert(pRec->cRegions == pRecVM->cRegions);
4243	#ifdef VBOX_STRICT
4244	for (unsigned i = 0; i < pRecVM->cRegions; i++)
4245	{
4246	Assert(pRecVM->aRegions[i].GCRegionAddr == pRec->aRegions[i].GCRegionAddr);
4247	Assert(pRecVM->aRegions[i].cbRegion == pRec->aRegions[i].cbRegion);
4248	}
4249	#endif
4250
4251	/* Remove from the tree and free memory. */
4252	RTAvlGCPtrRemove(&pGMM->pGlobalSharedModuleTree, pRec->Core.Key);
4253	RTMemFree(pRec);
4254	}
4255	}
4256	else
4257	rc = VERR_PGM_SHARED_MODULE_REGISTRATION_INCONSISTENCY;
4258	}
4259	else
4260	Assert(!pRecVM->pGlobalModule);
4261
4262	/* Remove from the tree and free memory. */
4263	RTAvlGCPtrRemove(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
4264	RTMemFree(pRecVM);
4265	}
4266	else
4267	rc = VERR_PGM_SHARED_MODULE_NOT_FOUND;
4268
4269	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4270	}
4271	else
4272	rc = VERR_INTERNAL_ERROR_5;
4273
4274	gmmR0MutexRelease(pGMM);
4275	return rc;
4276	#else
4277	return VERR_NOT_IMPLEMENTED;
4278	#endif
4279	}
4280
4281	/**
4282	* VMMR0 request wrapper for GMMR0UnregisterSharedModule.
4283	*
4284	* @returns see GMMR0UnregisterSharedModule.
4285	* @param pVM Pointer to the shared VM structure.
4286	* @param idCpu VCPU id
4287	* @param pReq The request packet.
4288	*/
4289	GMMR0DECL(int) GMMR0UnregisterSharedModuleReq(PVM pVM, VMCPUID idCpu, PGMMUNREGISTERSHAREDMODULEREQ pReq)
4290	{
4291	/*
4292	* Validate input and pass it on.
4293	*/
4294	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
4295	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
4296	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
4297
4298	return GMMR0UnregisterSharedModule(pVM, idCpu, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule);
4299	}
4300
4301	#ifdef VBOX_WITH_PAGE_SHARING
4302
4303	/**
4304	* Checks specified shared module range for changes
4305	*
4306	* Performs the following tasks:
4307	* - If a shared page is new, then it changes the GMM page type to shared and
4308	* returns it in the pPageDesc descriptor.
4309	* - If a shared page already exists, then it checks if the VM page is
4310	* identical and if so frees the VM page and returns the shared page in
4311	* pPageDesc descriptor.
4312	*
4313	* @remarks ASSUMES the caller has acquired the GMM semaphore!!
4314	*
4315	* @returns VBox status code.
4316	* @param pGMM Pointer to the GMM instance data.
4317	* @param pGVM Pointer to the GVM instance data.
4318	* @param pModule Module description
4319	* @param idxRegion Region index
4320	* @param idxPage Page index
4321	* @param paPageDesc Page descriptor
4322	*/
4323	GMMR0DECL(int) GMMR0SharedModuleCheckPage(PGVM pGVM, PGMMSHAREDMODULE pModule, unsigned idxRegion, unsigned idxPage,
4324	PGMMSHAREDPAGEDESC pPageDesc)
4325	{
4326	int rc = VINF_SUCCESS;
4327	PGMM pGMM;
4328	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4329	unsigned cPages = pModule->aRegions[idxRegion].cbRegion >> PAGE_SHIFT;
4330
4331	AssertReturn(idxRegion < pModule->cRegions, VERR_INVALID_PARAMETER);
4332	AssertReturn(idxPage < cPages, VERR_INVALID_PARAMETER);
4333
4334	LogFlow(("GMMR0SharedModuleCheckRange %s base %RGv region %d idxPage %d\n", pModule->szName, pModule->Core.Key, idxRegion, idxPage));
4335
4336	PGMMSHAREDREGIONDESC pGlobalRegion = &pModule->aRegions[idxRegion];
4337	if (!pGlobalRegion->paHCPhysPageID)
4338	{
4339	/* First time; create a page descriptor array. */
4340	Log(("Allocate page descriptor array for %d pages\n", cPages));
4341	pGlobalRegion->paHCPhysPageID = (uint32_t )RTMemAlloc(cPages sizeof(*pGlobalRegion->paHCPhysPageID));
4342	if (!pGlobalRegion->paHCPhysPageID)
4343	{
4344	AssertFailed();
4345	rc = VERR_NO_MEMORY;
4346	goto end;
4347	}
4348	/* Invalidate all descriptors. */
4349	for (unsigned i = 0; i < cPages; i++)
4350	pGlobalRegion->paHCPhysPageID[i] = NIL_GMM_PAGEID;
4351	}
4352
4353	/* We've seen this shared page for the first time? */
4354	if (pGlobalRegion->paHCPhysPageID[idxPage] == NIL_GMM_PAGEID)
4355	{
4356	new_shared_page:
4357	Log(("New shared page guest %RGp host %RHp\n", pPageDesc->GCPhys, pPageDesc->HCPhys));
4358
4359	/* Easy case: just change the internal page type. */
4360	PGMMPAGE pPage = gmmR0GetPage(pGMM, pPageDesc->uHCPhysPageId);
4361	if (!pPage)
4362	{
4363	Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #1 (GCPhys=%RGp HCPhys=%RHp idxRegion=%#x idxPage=%#x)\n",
4364	pPageDesc->uHCPhysPageId, pPageDesc->GCPhys, pPageDesc->HCPhys, idxRegion, idxPage));
4365	AssertFailed();
4366	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4367	goto end;
4368	}
4369
4370	AssertMsg(pPageDesc->GCPhys == (pPage->Private.pfn << 12), ("desc %RGp gmm %RGp\n", pPageDesc->HCPhys, (pPage->Private.pfn << 12)));
4371
4372	gmmR0ConvertToSharedPage(pGMM, pGVM, pPageDesc->HCPhys, pPageDesc->uHCPhysPageId, pPage);
4373
4374	/* Keep track of these references. */
4375	pGlobalRegion->paHCPhysPageID[idxPage] = pPageDesc->uHCPhysPageId;
4376	}
4377	else
4378	{
4379	uint8_t pbLocalPage, pbSharedPage;
4380	uint8_t *pbChunk;
4381	PGMMCHUNK pChunk;
4382
4383	Assert(pPageDesc->uHCPhysPageId != pGlobalRegion->paHCPhysPageID[idxPage]);
4384
4385	Log(("Replace existing page guest %RGp host %RHp id %x -> id %x\n", pPageDesc->GCPhys, pPageDesc->HCPhys, pPageDesc->uHCPhysPageId, pGlobalRegion->paHCPhysPageID[idxPage]));
4386
4387	/* Get the shared page source. */
4388	PGMMPAGE pPage = gmmR0GetPage(pGMM, pGlobalRegion->paHCPhysPageID[idxPage]);
4389	if (!pPage)
4390	{
4391	Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #2 (idxRegion=%#x idxPage=%#x)\n",
4392	pPageDesc->uHCPhysPageId, idxRegion, idxPage));
4393	AssertFailed();
4394	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4395	goto end;
4396	}
4397	if (pPage->Common.u2State != GMM_PAGE_STATE_SHARED)
4398	{
4399	/* Page was freed at some point; invalidate this entry. */
4400	/** @todo this isn't really bullet proof. */
4401	Log(("Old shared page was freed -> create a new one\n"));
4402	pGlobalRegion->paHCPhysPageID[idxPage] = NIL_GMM_PAGEID;
4403	goto new_shared_page; /* ugly goto */
4404	}
4405
4406	Log(("Replace existing page guest host %RHp -> %RHp\n", pPageDesc->HCPhys, ((uint64_t)pPage->Shared.pfn) << PAGE_SHIFT));
4407
4408	/* Calculate the virtual address of the local page. */
4409	pChunk = gmmR0GetChunk(pGMM, pPageDesc->uHCPhysPageId >> GMM_CHUNKID_SHIFT);
4410	if (pChunk)
4411	{
4412	if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
4413	{
4414	Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #3\n", pPageDesc->uHCPhysPageId));
4415	AssertFailed();
4416	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4417	goto end;
4418	}
4419	pbLocalPage = pbChunk + ((pPageDesc->uHCPhysPageId & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
4420	}
4421	else
4422	{
4423	Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #4\n", pPageDesc->uHCPhysPageId));
4424	AssertFailed();
4425	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4426	goto end;
4427	}
4428
4429	/* Calculate the virtual address of the shared page. */
4430	pChunk = gmmR0GetChunk(pGMM, pGlobalRegion->paHCPhysPageID[idxPage] >> GMM_CHUNKID_SHIFT);
4431	Assert(pChunk); /* can't fail as gmmR0GetPage succeeded. */
4432
4433	/* Get the virtual address of the physical page; map the chunk into the VM process if not already done. */
4434	if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
4435	{
4436	Log(("Map chunk into process!\n"));
4437	rc = gmmR0MapChunk(pGMM, pGVM, pChunk, false /fRelaxedSem/, (PRTR3PTR)&pbChunk);
4438	if (rc != VINF_SUCCESS)
4439	{
4440	AssertRC(rc);
4441	goto end;
4442	}
4443	}
4444	pbSharedPage = pbChunk + ((pGlobalRegion->paHCPhysPageID[idxPage] & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
4445
4446	/** @todo write ASMMemComparePage. */
4447	if (memcmp(pbSharedPage, pbLocalPage, PAGE_SIZE))
4448	{
4449	Log(("Unexpected differences found between local and shared page; skip\n"));
4450	/* Signal to the caller that this one hasn't changed. */
4451	pPageDesc->uHCPhysPageId = NIL_GMM_PAGEID;
4452	goto end;
4453	}
4454
4455	/* Free the old local page. */
4456	GMMFREEPAGEDESC PageDesc;
4457
4458	PageDesc.idPage = pPageDesc->uHCPhysPageId;
4459	rc = gmmR0FreePages(pGMM, pGVM, 1, &PageDesc, GMMACCOUNT_BASE);
4460	AssertRCReturn(rc, rc);
4461
4462	gmmR0UseSharedPage(pGMM, pGVM, pPage);
4463
4464	/* Pass along the new physical address & page id. */
4465	pPageDesc->HCPhys = ((uint64_t)pPage->Shared.pfn) << PAGE_SHIFT;
4466	pPageDesc->uHCPhysPageId = pGlobalRegion->paHCPhysPageID[idxPage];
4467	}
4468	end:
4469	return rc;
4470	}
4471
4472
4473	/**
4474	* RTAvlGCPtrDestroy callback.
4475	*
4476	* @returns 0 or VERR_INTERNAL_ERROR.
4477	* @param pNode The node to destroy.
4478	* @param pvGVM The GVM handle.
4479	*/
4480	static DECLCALLBACK(int) gmmR0CleanupSharedModule(PAVLGCPTRNODECORE pNode, void *pvGVM)
4481	{
4482	PGVM pGVM = (PGVM)pvGVM;
4483	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)pNode;
4484
4485	Assert(pRecVM->pGlobalModule \|\| pRecVM->fCollision);
4486	if (pRecVM->pGlobalModule)
4487	{
4488	PGMMSHAREDMODULE pRec = pRecVM->pGlobalModule;
4489	AssertPtr(pRec);
4490	Assert(pRec->cUsers);
4491
4492	Log(("gmmR0CleanupSharedModule: %s %s cUsers=%d\n", pRec->szName, pRec->szVersion, pRec->cUsers));
4493	pRec->cUsers--;
4494	if (pRec->cUsers == 0)
4495	{
4496	for (uint32_t i = 0; i < pRec->cRegions; i++)
4497	if (pRec->aRegions[i].paHCPhysPageID)
4498	RTMemFree(pRec->aRegions[i].paHCPhysPageID);
4499
4500	/* Remove from the tree and free memory. */
4501	PGMM pGMM;
4502	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4503	RTAvlGCPtrRemove(&pGMM->pGlobalSharedModuleTree, pRec->Core.Key);
4504	RTMemFree(pRec);
4505	}
4506	}
4507	RTMemFree(pRecVM);
4508	return 0;
4509	}
4510
4511
4512	/**
4513	* Used by GMMR0CleanupVM to clean up shared modules.
4514	*
4515	* This is called without taking the GMM lock so that it can be yielded as
4516	* needed here.
4517	*
4518	* @param pGMM The GMM handle.
4519	* @param pGVM The global VM handle.
4520	*/
4521	static void gmmR0SharedModuleCleanup(PGMM pGMM, PGVM pGVM)
4522	{
4523	gmmR0MutexAcquire(pGMM);
4524	GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
4525
4526	RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, pGVM);
4527
4528	gmmR0MutexRelease(pGMM);
4529	}
4530
4531	#endif /* VBOX_WITH_PAGE_SHARING */
4532
4533	/**
4534	* Removes all shared modules for the specified VM
4535	*
4536	* @returns VBox status code.
4537	* @param pVM VM handle
4538	* @param idCpu VCPU id
4539	*/
4540	GMMR0DECL(int) GMMR0ResetSharedModules(PVM pVM, VMCPUID idCpu)
4541	{
4542	#ifdef VBOX_WITH_PAGE_SHARING
4543	/*
4544	* Validate input and get the basics.
4545	*/
4546	PGMM pGMM;
4547	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4548	PGVM pGVM;
4549	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
4550	if (RT_FAILURE(rc))
4551	return rc;
4552
4553	/*
4554	* Take the semaphore and do some more validations.
4555	*/
4556	gmmR0MutexAcquire(pGMM);
4557	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4558	{
4559	Log(("GMMR0ResetSharedModules\n"));
4560	RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, pGVM);
4561
4562	rc = VINF_SUCCESS;
4563	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4564	}
4565	else
4566	rc = VERR_INTERNAL_ERROR_5;
4567
4568	gmmR0MutexRelease(pGMM);
4569	return rc;
4570	#else
4571	return VERR_NOT_IMPLEMENTED;
4572	#endif
4573	}
4574
4575	#ifdef VBOX_WITH_PAGE_SHARING
4576
4577	typedef struct
4578	{
4579	PGVM pGVM;
4580	VMCPUID idCpu;
4581	int rc;
4582	} GMMCHECKSHAREDMODULEINFO, *PGMMCHECKSHAREDMODULEINFO;
4583
4584	/**
4585	* Tree enumeration callback for checking a shared module.
4586	*/
4587	DECLCALLBACK(int) gmmR0CheckSharedModule(PAVLGCPTRNODECORE pNode, void *pvUser)
4588	{
4589	PGMMCHECKSHAREDMODULEINFO pInfo = (PGMMCHECKSHAREDMODULEINFO)pvUser;
4590	PGMMSHAREDMODULEPERVM pLocalModule = (PGMMSHAREDMODULEPERVM)pNode;
4591	PGMMSHAREDMODULE pGlobalModule = pLocalModule->pGlobalModule;
4592
4593	if ( !pLocalModule->fCollision
4594	&& pGlobalModule)
4595	{
4596	Log(("gmmR0CheckSharedModule: check %s %s base=%RGv size=%x collision=%d\n", pGlobalModule->szName, pGlobalModule->szVersion, pGlobalModule->Core.Key, pGlobalModule->cbModule, pLocalModule->fCollision));
4597	pInfo->rc = PGMR0SharedModuleCheck(pInfo->pGVM->pVM, pInfo->pGVM, pInfo->idCpu, pGlobalModule, pLocalModule->cRegions, pLocalModule->aRegions);
4598	if (RT_FAILURE(pInfo->rc))
4599	return 1; /* stop enumeration. */
4600	}
4601	return 0;
4602	}
4603
4604	#endif /* VBOX_WITH_PAGE_SHARING */
4605	#ifdef DEBUG_sandervl
4606
4607	/**
4608	* Setup for a GMMR0CheckSharedModules call (to allow log flush jumps back to ring 3)
4609	*
4610	* @returns VBox status code.
4611	* @param pVM VM handle
4612	*/
4613	GMMR0DECL(int) GMMR0CheckSharedModulesStart(PVM pVM)
4614	{
4615	/*
4616	* Validate input and get the basics.
4617	*/
4618	PGMM pGMM;
4619	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4620
4621	/*
4622	* Take the semaphore and do some more validations.
4623	*/
4624	gmmR0MutexAcquire(pGMM);
4625	if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4626	rc = VERR_INTERNAL_ERROR_5;
4627	else
4628	rc = VINF_SUCCESS;
4629
4630	return rc;
4631	}
4632
4633	/**
4634	* Clean up after a GMMR0CheckSharedModules call (to allow log flush jumps back to ring 3)
4635	*
4636	* @returns VBox status code.
4637	* @param pVM VM handle
4638	*/
4639	GMMR0DECL(int) GMMR0CheckSharedModulesEnd(PVM pVM)
4640	{
4641	/*
4642	* Validate input and get the basics.
4643	*/
4644	PGMM pGMM;
4645	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4646
4647	gmmR0MutexRelease(pGMM);
4648	return VINF_SUCCESS;
4649	}
4650
4651	#endif /* DEBUG_sandervl */
4652
4653	/**
4654	* Check all shared modules for the specified VM
4655	*
4656	* @returns VBox status code.
4657	* @param pVM VM handle
4658	* @param pVCpu VMCPU handle
4659	*/
4660	GMMR0DECL(int) GMMR0CheckSharedModules(PVM pVM, PVMCPU pVCpu)
4661	{
4662	#ifdef VBOX_WITH_PAGE_SHARING
4663	/*
4664	* Validate input and get the basics.
4665	*/
4666	PGMM pGMM;
4667	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4668	PGVM pGVM;
4669	int rc = GVMMR0ByVMAndEMT(pVM, pVCpu->idCpu, &pGVM);
4670	if (RT_FAILURE(rc))
4671	return rc;
4672
4673	# ifndef DEBUG_sandervl
4674	/*
4675	* Take the semaphore and do some more validations.
4676	*/
4677	gmmR0MutexAcquire(pGMM);
4678	# endif
4679	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4680	{
4681	GMMCHECKSHAREDMODULEINFO Info;
4682
4683	Log(("GMMR0CheckSharedModules\n"));
4684	Info.pGVM = pGVM;
4685	Info.idCpu = pVCpu->idCpu;
4686	Info.rc = VINF_SUCCESS;
4687
4688	RTAvlGCPtrDoWithAll(&pGVM->gmm.s.pSharedModuleTree, true /* fFromLeft */, gmmR0CheckSharedModule, &Info);
4689
4690	rc = Info.rc;
4691
4692	Log(("GMMR0CheckSharedModules done!\n"));
4693
4694	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4695	}
4696	else
4697	rc = VERR_INTERNAL_ERROR_5;
4698
4699	# ifndef DEBUG_sandervl
4700	gmmR0MutexRelease(pGMM);
4701	# endif
4702	return rc;
4703	#else
4704	return VERR_NOT_IMPLEMENTED;
4705	#endif
4706	}
4707
4708	#if defined(VBOX_STRICT) && HC_ARCH_BITS == 64
4709
4710	typedef struct
4711	{
4712	PGVM pGVM;
4713	PGMM pGMM;
4714	uint8_t *pSourcePage;
4715	bool fFoundDuplicate;
4716	} GMMFINDDUPPAGEINFO, *PGMMFINDDUPPAGEINFO;
4717
4718	/**
4719	* RTAvlU32DoWithAll callback.
4720	*
4721	* @returns 0
4722	* @param pNode The node to search.
4723	* @param pvInfo Pointer to the input parameters
4724	*/
4725	static DECLCALLBACK(int) gmmR0FindDupPageInChunk(PAVLU32NODECORE pNode, void *pvInfo)
4726	{
4727	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
4728	PGMMFINDDUPPAGEINFO pInfo = (PGMMFINDDUPPAGEINFO)pvInfo;
4729	PGVM pGVM = pInfo->pGVM;
4730	PGMM pGMM = pInfo->pGMM;
4731	uint8_t *pbChunk;
4732
4733	/* Only take chunks not mapped into this VM process; not entirely correct. */
4734	if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
4735	{
4736	int rc = gmmR0MapChunk(pGMM, pGVM, pChunk, false /fRelaxedSem/, (PRTR3PTR)&pbChunk);
4737	if (RT_SUCCESS(rc))
4738	{
4739	/*
4740	* Look for duplicate pages
4741	*/
4742	unsigned iPage = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
4743	while (iPage-- > 0)
4744	{
4745	if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
4746	{
4747	uint8_t *pbDestPage = pbChunk + (iPage << PAGE_SHIFT);
4748
4749	if (!memcmp(pInfo->pSourcePage, pbDestPage, PAGE_SIZE))
4750	{
4751	pInfo->fFoundDuplicate = true;
4752	break;
4753	}
4754	}
4755	}
4756	gmmR0UnmapChunk(pGMM, pGVM, pChunk, false /fRelaxedSem/);
4757	}
4758	}
4759	return pInfo->fFoundDuplicate; /* (stops search if true) */
4760	}
4761
4762
4763	/**
4764	* Find a duplicate of the specified page in other active VMs
4765	*
4766	* @returns VBox status code.
4767	* @param pVM VM handle
4768	* @param pReq Request packet
4769	*/
4770	GMMR0DECL(int) GMMR0FindDuplicatePageReq(PVM pVM, PGMMFINDDUPLICATEPAGEREQ pReq)
4771	{
4772	/*
4773	* Validate input and pass it on.
4774	*/
4775	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
4776	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
4777	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
4778
4779	PGMM pGMM;
4780	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4781
4782	PGVM pGVM;
4783	int rc = GVMMR0ByVM(pVM, &pGVM);
4784	if (RT_FAILURE(rc))
4785	return rc;
4786
4787	/*
4788	* Take the semaphore and do some more validations.
4789	*/
4790	rc = gmmR0MutexAcquire(pGMM);
4791	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4792	{
4793	uint8_t *pbChunk;
4794	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, pReq->idPage >> GMM_CHUNKID_SHIFT);
4795	if (pChunk)
4796	{
4797	if (gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
4798	{
4799	uint8_t *pbSourcePage = pbChunk + ((pReq->idPage & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
4800	PGMMPAGE pPage = gmmR0GetPage(pGMM, pReq->idPage);
4801	if (pPage)
4802	{
4803	GMMFINDDUPPAGEINFO Info;
4804	Info.pGVM = pGVM;
4805	Info.pGMM = pGMM;
4806	Info.pSourcePage = pbSourcePage;
4807	Info.fFoundDuplicate = false;
4808	RTAvlU32DoWithAll(&pGMM->pChunks, true /* fFromLeft */, gmmR0FindDupPageInChunk, &Info);
4809
4810	pReq->fDuplicate = Info.fFoundDuplicate;
4811	}
4812	else
4813	{
4814	AssertFailed();
4815	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4816	}
4817	}
4818	else
4819	AssertFailed();
4820	}
4821	else
4822	AssertFailed();
4823	}
4824	else
4825	rc = VERR_INTERNAL_ERROR_5;
4826
4827	gmmR0MutexRelease(pGMM);
4828	return rc;
4829	}
4830
4831	#endif /* VBOX_STRICT && HC_ARCH_BITS == 64 */
4832

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMR0/GMMR0.cpp@ 37203

Download in other formats: