VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GMMR0.cpp@ 36944

Last change on this file since 36944 was 36944, checked in by vboxsync, 14 years ago

VBox/param.h: Bumped the max RAM limit up to 2TB on 64-bit hosts (was 16GB). Docs

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 151.8 KB
Line 
1/* $Id: GMMR0.cpp 36944 2011-05-03 17:13:31Z vboxsync $ */
2/** @file
3 * GMM - Global Memory Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2011 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gmm GMM - The Global Memory Manager
20 *
21 * As the name indicates, this component is responsible for global memory
22 * management. Currently only guest RAM is allocated from the GMM, but this
23 * may change to include shadow page tables and other bits later.
24 *
25 * Guest RAM is managed as individual pages, but allocated from the host OS
26 * in chunks for reasons of portability / efficiency. To minimize the memory
27 * footprint all tracking structure must be as small as possible without
28 * unnecessary performance penalties.
29 *
30 * The allocation chunks has fixed sized, the size defined at compile time
31 * by the #GMM_CHUNK_SIZE \#define.
32 *
33 * Each chunk is given an unique ID. Each page also has a unique ID. The
34 * relation ship between the two IDs is:
35 * @code
36 * GMM_CHUNK_SHIFT = log2(GMM_CHUNK_SIZE / PAGE_SIZE);
37 * idPage = (idChunk << GMM_CHUNK_SHIFT) | iPage;
38 * @endcode
39 * Where iPage is the index of the page within the chunk. This ID scheme
40 * permits for efficient chunk and page lookup, but it relies on the chunk size
41 * to be set at compile time. The chunks are organized in an AVL tree with their
42 * IDs being the keys.
43 *
44 * The physical address of each page in an allocation chunk is maintained by
45 * the #RTR0MEMOBJ and obtained using #RTR0MemObjGetPagePhysAddr. There is no
46 * need to duplicate this information (it'll cost 8-bytes per page if we did).
47 *
48 * So what do we need to track per page? Most importantly we need to know
49 * which state the page is in:
50 * - Private - Allocated for (eventually) backing one particular VM page.
51 * - Shared - Readonly page that is used by one or more VMs and treated
52 * as COW by PGM.
53 * - Free - Not used by anyone.
54 *
55 * For the page replacement operations (sharing, defragmenting and freeing)
56 * to be somewhat efficient, private pages needs to be associated with a
57 * particular page in a particular VM.
58 *
59 * Tracking the usage of shared pages is impractical and expensive, so we'll
60 * settle for a reference counting system instead.
61 *
62 * Free pages will be chained on LIFOs
63 *
64 * On 64-bit systems we will use a 64-bit bitfield per page, while on 32-bit
65 * systems a 32-bit bitfield will have to suffice because of address space
66 * limitations. The #GMMPAGE structure shows the details.
67 *
68 *
69 * @section sec_gmm_alloc_strat Page Allocation Strategy
70 *
71 * The strategy for allocating pages has to take fragmentation and shared
72 * pages into account, or we may end up with with 2000 chunks with only
73 * a few pages in each. Shared pages cannot easily be reallocated because
74 * of the inaccurate usage accounting (see above). Private pages can be
75 * reallocated by a defragmentation thread in the same manner that sharing
76 * is done.
77 *
78 * The first approach is to manage the free pages in two sets depending on
79 * whether they are mainly for the allocation of shared or private pages.
80 * In the initial implementation there will be almost no possibility for
81 * mixing shared and private pages in the same chunk (only if we're really
82 * stressed on memory), but when we implement forking of VMs and have to
83 * deal with lots of COW pages it'll start getting kind of interesting.
84 *
85 * The sets are lists of chunks with approximately the same number of
86 * free pages. Say the chunk size is 1MB, meaning 256 pages, and a set
87 * consists of 16 lists. So, the first list will contain the chunks with
88 * 1-7 free pages, the second covers 8-15, and so on. The chunks will be
89 * moved between the lists as pages are freed up or allocated.
90 *
91 *
92 * @section sec_gmm_costs Costs
93 *
94 * The per page cost in kernel space is 32-bit plus whatever RTR0MEMOBJ
95 * entails. In addition there is the chunk cost of approximately
96 * (sizeof(RT0MEMOBJ) + sizeof(CHUNK)) / 2^CHUNK_SHIFT bytes per page.
97 *
98 * On Windows the per page #RTR0MEMOBJ cost is 32-bit on 32-bit windows
99 * and 64-bit on 64-bit windows (a PFN_NUMBER in the MDL). So, 64-bit per page.
100 * The cost on Linux is identical, but here it's because of sizeof(struct page *).
101 *
102 *
103 * @section sec_gmm_legacy Legacy Mode for Non-Tier-1 Platforms
104 *
105 * In legacy mode the page source is locked user pages and not
106 * #RTR0MemObjAllocPhysNC, this means that a page can only be allocated
107 * by the VM that locked it. We will make no attempt at implementing
108 * page sharing on these systems, just do enough to make it all work.
109 *
110 *
111 * @subsection sub_gmm_locking Serializing
112 *
113 * One simple fast mutex will be employed in the initial implementation, not
114 * two as mentioned in @ref subsec_pgmPhys_Serializing.
115 *
116 * @see @ref subsec_pgmPhys_Serializing
117 *
118 *
119 * @section sec_gmm_overcommit Memory Over-Commitment Management
120 *
121 * The GVM will have to do the system wide memory over-commitment
122 * management. My current ideas are:
123 * - Per VM oc policy that indicates how much to initially commit
124 * to it and what to do in a out-of-memory situation.
125 * - Prevent overtaxing the host.
126 *
127 * There are some challenges here, the main ones are configurability and
128 * security. Should we for instance permit anyone to request 100% memory
129 * commitment? Who should be allowed to do runtime adjustments of the
130 * config. And how to prevent these settings from being lost when the last
131 * VM process exits? The solution is probably to have an optional root
132 * daemon the will keep VMMR0.r0 in memory and enable the security measures.
133 *
134 *
135 *
136 * @section sec_gmm_numa NUMA
137 *
138 * NUMA considerations will be designed and implemented a bit later.
139 *
140 * The preliminary guesses is that we will have to try allocate memory as
141 * close as possible to the CPUs the VM is executed on (EMT and additional CPU
142 * threads). Which means it's mostly about allocation and sharing policies.
143 * Both the scheduler and allocator interface will to supply some NUMA info
144 * and we'll need to have a way to calc access costs.
145 *
146 */
147
148
149/*******************************************************************************
150* Header Files *
151*******************************************************************************/
152#define LOG_GROUP LOG_GROUP_GMM
153#include <VBox/rawpci.h>
154#include <VBox/vmm/vm.h>
155#include <VBox/vmm/gmm.h>
156#include "GMMR0Internal.h"
157#include <VBox/vmm/gvm.h>
158#include <VBox/vmm/pgm.h>
159#include <VBox/log.h>
160#include <VBox/param.h>
161#include <VBox/err.h>
162#include <iprt/asm.h>
163#include <iprt/avl.h>
164#include <iprt/mem.h>
165#include <iprt/memobj.h>
166#include <iprt/semaphore.h>
167#include <iprt/string.h>
168
169
170/*******************************************************************************
171* Structures and Typedefs *
172*******************************************************************************/
173/** Pointer to set of free chunks. */
174typedef struct GMMCHUNKFREESET *PGMMCHUNKFREESET;
175
176/** Pointer to a GMM allocation chunk. */
177typedef struct GMMCHUNK *PGMMCHUNK;
178
179/**
180 * The per-page tracking structure employed by the GMM.
181 *
182 * On 32-bit hosts we'll some trickery is necessary to compress all
183 * the information into 32-bits. When the fSharedFree member is set,
184 * the 30th bit decides whether it's a free page or not.
185 *
186 * Because of the different layout on 32-bit and 64-bit hosts, macros
187 * are used to get and set some of the data.
188 */
189typedef union GMMPAGE
190{
191#if HC_ARCH_BITS == 64
192 /** Unsigned integer view. */
193 uint64_t u;
194
195 /** The common view. */
196 struct GMMPAGECOMMON
197 {
198 uint32_t uStuff1 : 32;
199 uint32_t uStuff2 : 30;
200 /** The page state. */
201 uint32_t u2State : 2;
202 } Common;
203
204 /** The view of a private page. */
205 struct GMMPAGEPRIVATE
206 {
207 /** The guest page frame number. (Max addressable: 2 ^ 44 - 16) */
208 uint32_t pfn;
209 /** The GVM handle. (64K VMs) */
210 uint32_t hGVM : 16;
211 /** Reserved. */
212 uint32_t u16Reserved : 14;
213 /** The page state. */
214 uint32_t u2State : 2;
215 } Private;
216
217 /** The view of a shared page. */
218 struct GMMPAGESHARED
219 {
220 /** The host page frame number. (Max addressable: 2 ^ 44 - 16) */
221 uint32_t pfn;
222 /** The reference count (64K VMs). */
223 uint32_t cRefs : 16;
224 /** Reserved. Checksum or something? Two hGVMs for forking? */
225 uint32_t u14Reserved : 14;
226 /** The page state. */
227 uint32_t u2State : 2;
228 } Shared;
229
230 /** The view of a free page. */
231 struct GMMPAGEFREE
232 {
233 /** The index of the next page in the free list. UINT16_MAX is NIL. */
234 uint16_t iNext;
235 /** Reserved. Checksum or something? */
236 uint16_t u16Reserved0;
237 /** Reserved. Checksum or something? */
238 uint32_t u30Reserved1 : 30;
239 /** The page state. */
240 uint32_t u2State : 2;
241 } Free;
242
243#else /* 32-bit */
244 /** Unsigned integer view. */
245 uint32_t u;
246
247 /** The common view. */
248 struct GMMPAGECOMMON
249 {
250 uint32_t uStuff : 30;
251 /** The page state. */
252 uint32_t u2State : 2;
253 } Common;
254
255 /** The view of a private page. */
256 struct GMMPAGEPRIVATE
257 {
258 /** The guest page frame number. (Max addressable: 2 ^ 36) */
259 uint32_t pfn : 24;
260 /** The GVM handle. (127 VMs) */
261 uint32_t hGVM : 7;
262 /** The top page state bit, MBZ. */
263 uint32_t fZero : 1;
264 } Private;
265
266 /** The view of a shared page. */
267 struct GMMPAGESHARED
268 {
269 /** The reference count. */
270 uint32_t cRefs : 30;
271 /** The page state. */
272 uint32_t u2State : 2;
273 } Shared;
274
275 /** The view of a free page. */
276 struct GMMPAGEFREE
277 {
278 /** The index of the next page in the free list. UINT16_MAX is NIL. */
279 uint32_t iNext : 16;
280 /** Reserved. Checksum or something? */
281 uint32_t u14Reserved : 14;
282 /** The page state. */
283 uint32_t u2State : 2;
284 } Free;
285#endif
286} GMMPAGE;
287AssertCompileSize(GMMPAGE, sizeof(RTHCUINTPTR));
288/** Pointer to a GMMPAGE. */
289typedef GMMPAGE *PGMMPAGE;
290
291
292/** @name The Page States.
293 * @{ */
294/** A private page. */
295#define GMM_PAGE_STATE_PRIVATE 0
296/** A private page - alternative value used on the 32-bit implementation.
297 * This will never be used on 64-bit hosts. */
298#define GMM_PAGE_STATE_PRIVATE_32 1
299/** A shared page. */
300#define GMM_PAGE_STATE_SHARED 2
301/** A free page. */
302#define GMM_PAGE_STATE_FREE 3
303/** @} */
304
305
306/** @def GMM_PAGE_IS_PRIVATE
307 *
308 * @returns true if private, false if not.
309 * @param pPage The GMM page.
310 */
311#if HC_ARCH_BITS == 64
312# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_PRIVATE )
313#else
314# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Private.fZero == 0 )
315#endif
316
317/** @def GMM_PAGE_IS_SHARED
318 *
319 * @returns true if shared, false if not.
320 * @param pPage The GMM page.
321 */
322#define GMM_PAGE_IS_SHARED(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_SHARED )
323
324/** @def GMM_PAGE_IS_FREE
325 *
326 * @returns true if free, false if not.
327 * @param pPage The GMM page.
328 */
329#define GMM_PAGE_IS_FREE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_FREE )
330
331/** @def GMM_PAGE_PFN_LAST
332 * The last valid guest pfn range.
333 * @remark Some of the values outside the range has special meaning,
334 * see GMM_PAGE_PFN_UNSHAREABLE.
335 */
336#if HC_ARCH_BITS == 64
337# define GMM_PAGE_PFN_LAST UINT32_C(0xfffffff0)
338#else
339# define GMM_PAGE_PFN_LAST UINT32_C(0x00fffff0)
340#endif
341AssertCompile(GMM_PAGE_PFN_LAST == (GMM_GCPHYS_LAST >> PAGE_SHIFT));
342
343/** @def GMM_PAGE_PFN_UNSHAREABLE
344 * Indicates that this page isn't used for normal guest memory and thus isn't shareable.
345 */
346#if HC_ARCH_BITS == 64
347# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0xfffffff1)
348#else
349# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0x00fffff1)
350#endif
351AssertCompile(GMM_PAGE_PFN_UNSHAREABLE == (GMM_GCPHYS_UNSHAREABLE >> PAGE_SHIFT));
352
353
354/**
355 * A GMM allocation chunk ring-3 mapping record.
356 *
357 * This should really be associated with a session and not a VM, but
358 * it's simpler to associated with a VM and cleanup with the VM object
359 * is destroyed.
360 */
361typedef struct GMMCHUNKMAP
362{
363 /** The mapping object. */
364 RTR0MEMOBJ MapObj;
365 /** The VM owning the mapping. */
366 PGVM pGVM;
367} GMMCHUNKMAP;
368/** Pointer to a GMM allocation chunk mapping. */
369typedef struct GMMCHUNKMAP *PGMMCHUNKMAP;
370
371typedef enum GMMCHUNKTYPE
372{
373 GMMCHUNKTYPE_INVALID = 0,
374 GMMCHUNKTYPE_NON_CONTINUOUS = 1, /* 4 kb pages */
375 GMMCHUNKTYPE_CONTINUOUS = 2, /* one 2 MB continuous physical range. */
376 GMMCHUNKTYPE_32BIT_HACK = 0x7fffffff
377} GMMCHUNKTYPE;
378
379
380/**
381 * A GMM allocation chunk.
382 */
383typedef struct GMMCHUNK
384{
385 /** The AVL node core.
386 * The Key is the chunk ID. */
387 AVLU32NODECORE Core;
388 /** The memory object.
389 * Either from RTR0MemObjAllocPhysNC or RTR0MemObjLockUser depending on
390 * what the host can dish up with. */
391 RTR0MEMOBJ MemObj;
392 /** Pointer to the next chunk in the free list. */
393 PGMMCHUNK pFreeNext;
394 /** Pointer to the previous chunk in the free list. */
395 PGMMCHUNK pFreePrev;
396 /** Pointer to the free set this chunk belongs to. NULL for
397 * chunks with no free pages. */
398 PGMMCHUNKFREESET pSet;
399 /** Pointer to an array of mappings. */
400 PGMMCHUNKMAP paMappings;
401 /** The number of mappings. */
402 uint16_t cMappings;
403 /** The head of the list of free pages. UINT16_MAX is the NIL value. */
404 uint16_t iFreeHead;
405 /** The number of free pages. */
406 uint16_t cFree;
407 /** The GVM handle of the VM that first allocated pages from this chunk, this
408 * is used as a preference when there are several chunks to choose from.
409 * When in bound memory mode this isn't a preference any longer. */
410 uint16_t hGVM;
411 /** The number of private pages. */
412 uint16_t cPrivate;
413 /** The number of shared pages. */
414 uint16_t cShared;
415 /** Chunk type */
416 GMMCHUNKTYPE enmType;
417 /** The pages. */
418 GMMPAGE aPages[GMM_CHUNK_SIZE >> PAGE_SHIFT];
419} GMMCHUNK;
420
421
422/**
423 * An allocation chunk TLB entry.
424 */
425typedef struct GMMCHUNKTLBE
426{
427 /** The chunk id. */
428 uint32_t idChunk;
429 /** Pointer to the chunk. */
430 PGMMCHUNK pChunk;
431} GMMCHUNKTLBE;
432/** Pointer to an allocation chunk TLB entry. */
433typedef GMMCHUNKTLBE *PGMMCHUNKTLBE;
434
435
436/** The number of entries tin the allocation chunk TLB. */
437#define GMM_CHUNKTLB_ENTRIES 32
438/** Gets the TLB entry index for the given Chunk ID. */
439#define GMM_CHUNKTLB_IDX(idChunk) ( (idChunk) & (GMM_CHUNKTLB_ENTRIES - 1) )
440
441/**
442 * An allocation chunk TLB.
443 */
444typedef struct GMMCHUNKTLB
445{
446 /** The TLB entries. */
447 GMMCHUNKTLBE aEntries[GMM_CHUNKTLB_ENTRIES];
448} GMMCHUNKTLB;
449/** Pointer to an allocation chunk TLB. */
450typedef GMMCHUNKTLB *PGMMCHUNKTLB;
451
452
453/** The GMMCHUNK::cFree shift count. */
454#define GMM_CHUNK_FREE_SET_SHIFT 4
455/** The GMMCHUNK::cFree mask for use when considering relinking a chunk. */
456#define GMM_CHUNK_FREE_SET_MASK 15
457/** The number of lists in set. */
458#define GMM_CHUNK_FREE_SET_LISTS (GMM_CHUNK_NUM_PAGES >> GMM_CHUNK_FREE_SET_SHIFT)
459
460/**
461 * A set of free chunks.
462 */
463typedef struct GMMCHUNKFREESET
464{
465 /** The number of free pages in the set. */
466 uint64_t cFreePages;
467 /** Chunks ordered by increasing number of free pages. */
468 PGMMCHUNK apLists[GMM_CHUNK_FREE_SET_LISTS];
469} GMMCHUNKFREESET;
470
471
472/**
473 * The GMM instance data.
474 */
475typedef struct GMM
476{
477 /** Magic / eye catcher. GMM_MAGIC */
478 uint32_t u32Magic;
479 /** The fast mutex protecting the GMM.
480 * More fine grained locking can be implemented later if necessary. */
481 RTSEMFASTMUTEX Mtx;
482 /** The chunk tree. */
483 PAVLU32NODECORE pChunks;
484 /** The chunk TLB. */
485 GMMCHUNKTLB ChunkTLB;
486 /** The private free set. */
487 GMMCHUNKFREESET Private;
488 /** The shared free set. */
489 GMMCHUNKFREESET Shared;
490
491 /** Shared module tree (global). */
492 /** @todo separate trees for distinctly different guest OSes. */
493 PAVLGCPTRNODECORE pGlobalSharedModuleTree;
494
495 /** The maximum number of pages we're allowed to allocate.
496 * @gcfgm 64-bit GMM/MaxPages Direct.
497 * @gcfgm 32-bit GMM/PctPages Relative to the number of host pages. */
498 uint64_t cMaxPages;
499 /** The number of pages that has been reserved.
500 * The deal is that cReservedPages - cOverCommittedPages <= cMaxPages. */
501 uint64_t cReservedPages;
502 /** The number of pages that we have over-committed in reservations. */
503 uint64_t cOverCommittedPages;
504 /** The number of actually allocated (committed if you like) pages. */
505 uint64_t cAllocatedPages;
506 /** The number of pages that are shared. A subset of cAllocatedPages. */
507 uint64_t cSharedPages;
508 /** The number of pages that are actually shared between VMs. */
509 uint64_t cDuplicatePages;
510 /** The number of pages that are shared that has been left behind by
511 * VMs not doing proper cleanups. */
512 uint64_t cLeftBehindSharedPages;
513 /** The number of allocation chunks.
514 * (The number of pages we've allocated from the host can be derived from this.) */
515 uint32_t cChunks;
516 /** The number of current ballooned pages. */
517 uint64_t cBalloonedPages;
518
519 /** The legacy allocation mode indicator.
520 * This is determined at initialization time. */
521 bool fLegacyAllocationMode;
522 /** The bound memory mode indicator.
523 * When set, the memory will be bound to a specific VM and never
524 * shared. This is always set if fLegacyAllocationMode is set.
525 * (Also determined at initialization time.) */
526 bool fBoundMemoryMode;
527 /** The number of registered VMs. */
528 uint16_t cRegisteredVMs;
529
530 /** The previous allocated Chunk ID.
531 * Used as a hint to avoid scanning the whole bitmap. */
532 uint32_t idChunkPrev;
533 /** Chunk ID allocation bitmap.
534 * Bits of allocated IDs are set, free ones are clear.
535 * The NIL id (0) is marked allocated. */
536 uint32_t bmChunkId[(GMM_CHUNKID_LAST + 1 + 31) / 32];
537} GMM;
538/** Pointer to the GMM instance. */
539typedef GMM *PGMM;
540
541/** The value of GMM::u32Magic (Katsuhiro Otomo). */
542#define GMM_MAGIC 0x19540414
543
544
545/*******************************************************************************
546* Global Variables *
547*******************************************************************************/
548/** Pointer to the GMM instance data. */
549static PGMM g_pGMM = NULL;
550
551/** Macro for obtaining and validating the g_pGMM pointer.
552 * On failure it will return from the invoking function with the specified return value.
553 *
554 * @param pGMM The name of the pGMM variable.
555 * @param rc The return value on failure. Use VERR_INTERNAL_ERROR for
556 * VBox status codes.
557 */
558#define GMM_GET_VALID_INSTANCE(pGMM, rc) \
559 do { \
560 (pGMM) = g_pGMM; \
561 AssertPtrReturn((pGMM), (rc)); \
562 AssertMsgReturn((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic), (rc)); \
563 } while (0)
564
565/** Macro for obtaining and validating the g_pGMM pointer, void function variant.
566 * On failure it will return from the invoking function.
567 *
568 * @param pGMM The name of the pGMM variable.
569 */
570#define GMM_GET_VALID_INSTANCE_VOID(pGMM) \
571 do { \
572 (pGMM) = g_pGMM; \
573 AssertPtrReturnVoid((pGMM)); \
574 AssertMsgReturnVoid((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic)); \
575 } while (0)
576
577
578/** @def GMM_CHECK_SANITY_UPON_ENTERING
579 * Checks the sanity of the GMM instance data before making changes.
580 *
581 * This is macro is a stub by default and must be enabled manually in the code.
582 *
583 * @returns true if sane, false if not.
584 * @param pGMM The name of the pGMM variable.
585 */
586#if defined(VBOX_STRICT) && 0
587# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
588#else
589# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (true)
590#endif
591
592/** @def GMM_CHECK_SANITY_UPON_LEAVING
593 * Checks the sanity of the GMM instance data after making changes.
594 *
595 * This is macro is a stub by default and must be enabled manually in the code.
596 *
597 * @returns true if sane, false if not.
598 * @param pGMM The name of the pGMM variable.
599 */
600#if defined(VBOX_STRICT) && 0
601# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
602#else
603# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (true)
604#endif
605
606/** @def GMM_CHECK_SANITY_IN_LOOPS
607 * Checks the sanity of the GMM instance in the allocation loops.
608 *
609 * This is macro is a stub by default and must be enabled manually in the code.
610 *
611 * @returns true if sane, false if not.
612 * @param pGMM The name of the pGMM variable.
613 */
614#if defined(VBOX_STRICT) && 0
615# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
616#else
617# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (true)
618#endif
619
620
621/*******************************************************************************
622* Internal Functions *
623*******************************************************************************/
624static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM);
625static DECLCALLBACK(int) gmmR0CleanupVMScanChunk(PAVLU32NODECORE pNode, void *pvGMM);
626static DECLCALLBACK(int) gmmR0CleanupSharedModule(PAVLGCPTRNODECORE pNode, void *pvGVM);
627/*static*/ DECLCALLBACK(int) gmmR0CleanupVMDestroyChunk(PAVLU32NODECORE pNode, void *pvGVM);
628DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet);
629DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk);
630static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo);
631static void gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
632static void gmmR0FreeSharedPage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage);
633static int gmmR0UnmapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
634
635
636
637/**
638 * Initializes the GMM component.
639 *
640 * This is called when the VMMR0.r0 module is loaded and protected by the
641 * loader semaphore.
642 *
643 * @returns VBox status code.
644 */
645GMMR0DECL(int) GMMR0Init(void)
646{
647 LogFlow(("GMMInit:\n"));
648
649 /*
650 * Allocate the instance data and the lock(s).
651 */
652 PGMM pGMM = (PGMM)RTMemAllocZ(sizeof(*pGMM));
653 if (!pGMM)
654 return VERR_NO_MEMORY;
655 pGMM->u32Magic = GMM_MAGIC;
656 for (unsigned i = 0; i < RT_ELEMENTS(pGMM->ChunkTLB.aEntries); i++)
657 pGMM->ChunkTLB.aEntries[i].idChunk = NIL_GMM_CHUNKID;
658 ASMBitSet(&pGMM->bmChunkId[0], NIL_GMM_CHUNKID);
659
660 int rc = RTSemFastMutexCreate(&pGMM->Mtx);
661 if (RT_SUCCESS(rc))
662 {
663 /*
664 * Check and see if RTR0MemObjAllocPhysNC works.
665 */
666#if 0 /* later, see #3170. */
667 RTR0MEMOBJ MemObj;
668 rc = RTR0MemObjAllocPhysNC(&MemObj, _64K, NIL_RTHCPHYS);
669 if (RT_SUCCESS(rc))
670 {
671 rc = RTR0MemObjFree(MemObj, true);
672 AssertRC(rc);
673 }
674 else if (rc == VERR_NOT_SUPPORTED)
675 pGMM->fLegacyAllocationMode = pGMM->fBoundMemoryMode = true;
676 else
677 SUPR0Printf("GMMR0Init: RTR0MemObjAllocPhysNC(,64K,Any) -> %d!\n", rc);
678#else
679# if defined(RT_OS_WINDOWS) || (defined(RT_OS_SOLARIS) && ARCH_BITS == 64) || defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
680 pGMM->fLegacyAllocationMode = false;
681# if ARCH_BITS == 32
682 /* Don't reuse possibly partial chunks because of the virtual address space limitation. */
683 pGMM->fBoundMemoryMode = true;
684# else
685 pGMM->fBoundMemoryMode = false;
686# endif
687# else
688 pGMM->fLegacyAllocationMode = true;
689 pGMM->fBoundMemoryMode = true;
690# endif
691#endif
692
693 /*
694 * Query system page count and guess a reasonable cMaxPages value.
695 */
696 pGMM->cMaxPages = UINT32_MAX; /** @todo IPRT function for query ram size and such. */
697
698 g_pGMM = pGMM;
699 LogFlow(("GMMInit: pGMM=%p fLegacyAllocationMode=%RTbool fBoundMemoryMode=%RTbool\n", pGMM, pGMM->fLegacyAllocationMode, pGMM->fBoundMemoryMode));
700 return VINF_SUCCESS;
701 }
702
703 RTMemFree(pGMM);
704 SUPR0Printf("GMMR0Init: failed! rc=%d\n", rc);
705 return rc;
706}
707
708
709/**
710 * Terminates the GMM component.
711 */
712GMMR0DECL(void) GMMR0Term(void)
713{
714 LogFlow(("GMMTerm:\n"));
715
716 /*
717 * Take care / be paranoid...
718 */
719 PGMM pGMM = g_pGMM;
720 if (!VALID_PTR(pGMM))
721 return;
722 if (pGMM->u32Magic != GMM_MAGIC)
723 {
724 SUPR0Printf("GMMR0Term: u32Magic=%#x\n", pGMM->u32Magic);
725 return;
726 }
727
728 /*
729 * Undo what init did and free all the resources we've acquired.
730 */
731 /* Destroy the fundamentals. */
732 g_pGMM = NULL;
733 pGMM->u32Magic++;
734 RTSemFastMutexDestroy(pGMM->Mtx);
735 pGMM->Mtx = NIL_RTSEMFASTMUTEX;
736
737 /* free any chunks still hanging around. */
738 RTAvlU32Destroy(&pGMM->pChunks, gmmR0TermDestroyChunk, pGMM);
739
740 /* finally the instance data itself. */
741 RTMemFree(pGMM);
742 LogFlow(("GMMTerm: done\n"));
743}
744
745
746/**
747 * RTAvlU32Destroy callback.
748 *
749 * @returns 0
750 * @param pNode The node to destroy.
751 * @param pvGMM The GMM handle.
752 */
753static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM)
754{
755 PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
756
757 if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
758 SUPR0Printf("GMMR0Term: %p/%#x: cFree=%d cPrivate=%d cShared=%d cMappings=%d\n", pChunk,
759 pChunk->Core.Key, pChunk->cFree, pChunk->cPrivate, pChunk->cShared, pChunk->cMappings);
760
761 int rc = RTR0MemObjFree(pChunk->MemObj, true /* fFreeMappings */);
762 if (RT_FAILURE(rc))
763 {
764 SUPR0Printf("GMMR0Term: %p/%#x: RTRMemObjFree(%p,true) -> %d (cMappings=%d)\n", pChunk,
765 pChunk->Core.Key, pChunk->MemObj, rc, pChunk->cMappings);
766 AssertRC(rc);
767 }
768 pChunk->MemObj = NIL_RTR0MEMOBJ;
769
770 RTMemFree(pChunk->paMappings);
771 pChunk->paMappings = NULL;
772
773 RTMemFree(pChunk);
774 NOREF(pvGMM);
775 return 0;
776}
777
778
779/**
780 * Initializes the per-VM data for the GMM.
781 *
782 * This is called from within the GVMM lock (from GVMMR0CreateVM)
783 * and should only initialize the data members so GMMR0CleanupVM
784 * can deal with them. We reserve no memory or anything here,
785 * that's done later in GMMR0InitVM.
786 *
787 * @param pGVM Pointer to the Global VM structure.
788 */
789GMMR0DECL(void) GMMR0InitPerVMData(PGVM pGVM)
790{
791 AssertCompile(RT_SIZEOFMEMB(GVM,gmm.s) <= RT_SIZEOFMEMB(GVM,gmm.padding));
792
793 pGVM->gmm.s.enmPolicy = GMMOCPOLICY_INVALID;
794 pGVM->gmm.s.enmPriority = GMMPRIORITY_INVALID;
795 pGVM->gmm.s.fMayAllocate = false;
796}
797
798
799/**
800 * Cleans up when a VM is terminating.
801 *
802 * @param pGVM Pointer to the Global VM structure.
803 */
804GMMR0DECL(void) GMMR0CleanupVM(PGVM pGVM)
805{
806 LogFlow(("GMMR0CleanupVM: pGVM=%p:{.pVM=%p, .hSelf=%#x}\n", pGVM, pGVM->pVM, pGVM->hSelf));
807
808 PGMM pGMM;
809 GMM_GET_VALID_INSTANCE_VOID(pGMM);
810
811 int rc = RTSemFastMutexRequest(pGMM->Mtx);
812 AssertRC(rc);
813 GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
814
815#ifdef VBOX_WITH_PAGE_SHARING
816 /* Clean up all registered shared modules. */
817 RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, pGVM);
818#endif
819
820 /*
821 * The policy is 'INVALID' until the initial reservation
822 * request has been serviced.
823 */
824 if ( pGVM->gmm.s.enmPolicy > GMMOCPOLICY_INVALID
825 && pGVM->gmm.s.enmPolicy < GMMOCPOLICY_END)
826 {
827 /*
828 * If it's the last VM around, we can skip walking all the chunk looking
829 * for the pages owned by this VM and instead flush the whole shebang.
830 *
831 * This takes care of the eventuality that a VM has left shared page
832 * references behind (shouldn't happen of course, but you never know).
833 */
834 Assert(pGMM->cRegisteredVMs);
835 pGMM->cRegisteredVMs--;
836#if 0 /* disabled so it won't hide bugs. */
837 if (!pGMM->cRegisteredVMs)
838 {
839 RTAvlU32Destroy(&pGMM->pChunks, gmmR0CleanupVMDestroyChunk, pGMM);
840
841 for (unsigned i = 0; i < RT_ELEMENTS(pGMM->ChunkTLB.aEntries); i++)
842 {
843 pGMM->ChunkTLB.aEntries[i].idChunk = NIL_GMM_CHUNKID;
844 pGMM->ChunkTLB.aEntries[i].pChunk = NULL;
845 }
846
847 memset(&pGMM->Private, 0, sizeof(pGMM->Private));
848 memset(&pGMM->Shared, 0, sizeof(pGMM->Shared));
849
850 memset(&pGMM->bmChunkId[0], 0, sizeof(pGMM->bmChunkId));
851 ASMBitSet(&pGMM->bmChunkId[0], NIL_GMM_CHUNKID);
852
853 pGMM->cReservedPages = 0;
854 pGMM->cOverCommittedPages = 0;
855 pGMM->cAllocatedPages = 0;
856 pGMM->cSharedPages = 0;
857 pGMM->cDuplicatePages = 0;
858 pGMM->cLeftBehindSharedPages = 0;
859 pGMM->cChunks = 0;
860 pGMM->cBalloonedPages = 0;
861 }
862 else
863#endif
864 {
865 /*
866 * Walk the entire pool looking for pages that belong to this VM
867 * and left over mappings. (This'll only catch private pages, shared
868 * pages will be 'left behind'.)
869 */
870 /** @todo this might be kind of expensive with a lot of VMs and
871 * memory hanging around... */
872 uint64_t cPrivatePages = pGVM->gmm.s.cPrivatePages; /* save */
873 RTAvlU32DoWithAll(&pGMM->pChunks, true /* fFromLeft */, gmmR0CleanupVMScanChunk, pGVM);
874 if (pGVM->gmm.s.cPrivatePages)
875 SUPR0Printf("GMMR0CleanupVM: hGVM=%#x has %#x private pages that cannot be found!\n", pGVM->hSelf, pGVM->gmm.s.cPrivatePages);
876 pGMM->cAllocatedPages -= cPrivatePages;
877
878 /* free empty chunks. */
879 if (cPrivatePages)
880 {
881 PGMMCHUNK pCur = pGMM->Private.apLists[RT_ELEMENTS(pGMM->Private.apLists) - 1];
882 while (pCur)
883 {
884 PGMMCHUNK pNext = pCur->pFreeNext;
885 if ( pCur->cFree == GMM_CHUNK_NUM_PAGES
886 && ( !pGMM->fBoundMemoryMode
887 || pCur->hGVM == pGVM->hSelf))
888 gmmR0FreeChunk(pGMM, pGVM, pCur);
889 pCur = pNext;
890 }
891 }
892
893 /* account for shared pages that weren't freed. */
894 if (pGVM->gmm.s.cSharedPages)
895 {
896 Assert(pGMM->cSharedPages >= pGVM->gmm.s.cSharedPages);
897 SUPR0Printf("GMMR0CleanupVM: hGVM=%#x left %#x shared pages behind!\n", pGVM->hSelf, pGVM->gmm.s.cSharedPages);
898 pGMM->cLeftBehindSharedPages += pGVM->gmm.s.cSharedPages;
899 }
900
901 /* Clean up balloon statistics in case the VM process crashed. */
902 Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.cBalloonedPages);
903 pGMM->cBalloonedPages -= pGVM->gmm.s.cBalloonedPages;
904
905 /*
906 * Update the over-commitment management statistics.
907 */
908 pGMM->cReservedPages -= pGVM->gmm.s.Reserved.cBasePages
909 + pGVM->gmm.s.Reserved.cFixedPages
910 + pGVM->gmm.s.Reserved.cShadowPages;
911 switch (pGVM->gmm.s.enmPolicy)
912 {
913 case GMMOCPOLICY_NO_OC:
914 break;
915 default:
916 /** @todo Update GMM->cOverCommittedPages */
917 break;
918 }
919 }
920 }
921
922 /* zap the GVM data. */
923 pGVM->gmm.s.enmPolicy = GMMOCPOLICY_INVALID;
924 pGVM->gmm.s.enmPriority = GMMPRIORITY_INVALID;
925 pGVM->gmm.s.fMayAllocate = false;
926
927 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
928 RTSemFastMutexRelease(pGMM->Mtx);
929
930 LogFlow(("GMMR0CleanupVM: returns\n"));
931}
932
933
934/**
935 * RTAvlU32DoWithAll callback.
936 *
937 * @returns 0
938 * @param pNode The node to search.
939 * @param pvGVM Pointer to the shared VM structure.
940 */
941static DECLCALLBACK(int) gmmR0CleanupVMScanChunk(PAVLU32NODECORE pNode, void *pvGVM)
942{
943 PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
944 PGVM pGVM = (PGVM)pvGVM;
945
946 /*
947 * Look for pages belonging to the VM.
948 * (Perform some internal checks while we're scanning.)
949 */
950#ifndef VBOX_STRICT
951 if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
952#endif
953 {
954 unsigned cPrivate = 0;
955 unsigned cShared = 0;
956 unsigned cFree = 0;
957
958 gmmR0UnlinkChunk(pChunk); /* avoiding cFreePages updates. */
959
960 uint16_t hGVM = pGVM->hSelf;
961 unsigned iPage = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
962 while (iPage-- > 0)
963 if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
964 {
965 if (pChunk->aPages[iPage].Private.hGVM == hGVM)
966 {
967 /*
968 * Free the page.
969 *
970 * The reason for not using gmmR0FreePrivatePage here is that we
971 * must *not* cause the chunk to be freed from under us - we're in
972 * an AVL tree walk here.
973 */
974 pChunk->aPages[iPage].u = 0;
975 pChunk->aPages[iPage].Free.iNext = pChunk->iFreeHead;
976 pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
977 pChunk->iFreeHead = iPage;
978 pChunk->cPrivate--;
979 pChunk->cFree++;
980 pGVM->gmm.s.cPrivatePages--;
981 cFree++;
982 }
983 else
984 cPrivate++;
985 }
986 else if (GMM_PAGE_IS_FREE(&pChunk->aPages[iPage]))
987 cFree++;
988 else
989 cShared++;
990
991 gmmR0LinkChunk(pChunk, pChunk->cShared ? &g_pGMM->Shared : &g_pGMM->Private);
992
993 /*
994 * Did it add up?
995 */
996 if (RT_UNLIKELY( pChunk->cFree != cFree
997 || pChunk->cPrivate != cPrivate
998 || pChunk->cShared != cShared))
999 {
1000 SUPR0Printf("gmmR0CleanupVMScanChunk: Chunk %p/%#x has bogus stats - free=%d/%d private=%d/%d shared=%d/%d\n",
1001 pChunk->cFree, cFree, pChunk->cPrivate, cPrivate, pChunk->cShared, cShared);
1002 pChunk->cFree = cFree;
1003 pChunk->cPrivate = cPrivate;
1004 pChunk->cShared = cShared;
1005 }
1006 }
1007
1008 /*
1009 * Look for the mapping belonging to the terminating VM.
1010 */
1011 for (unsigned i = 0; i < pChunk->cMappings; i++)
1012 if (pChunk->paMappings[i].pGVM == pGVM)
1013 {
1014 RTR0MEMOBJ MemObj = pChunk->paMappings[i].MapObj;
1015
1016 pChunk->cMappings--;
1017 if (i < pChunk->cMappings)
1018 pChunk->paMappings[i] = pChunk->paMappings[pChunk->cMappings];
1019 pChunk->paMappings[pChunk->cMappings].pGVM = NULL;
1020 pChunk->paMappings[pChunk->cMappings].MapObj = NIL_RTR0MEMOBJ;
1021
1022 int rc = RTR0MemObjFree(MemObj, false /* fFreeMappings (NA) */);
1023 if (RT_FAILURE(rc))
1024 {
1025 SUPR0Printf("gmmR0CleanupVMScanChunk: %p/%#x: mapping #%x: RTRMemObjFree(%p,false) -> %d \n",
1026 pChunk, pChunk->Core.Key, i, MemObj, rc);
1027 AssertRC(rc);
1028 }
1029 break;
1030 }
1031
1032 /*
1033 * If not in bound memory mode, we should reset the hGVM field
1034 * if it has our handle in it.
1035 */
1036 if (pChunk->hGVM == pGVM->hSelf)
1037 {
1038 if (!g_pGMM->fBoundMemoryMode)
1039 pChunk->hGVM = NIL_GVM_HANDLE;
1040 else if (pChunk->cFree != GMM_CHUNK_NUM_PAGES)
1041 {
1042 SUPR0Printf("gmmR0CleanupVMScanChunk: %p/%#x: cFree=%#x - it should be 0 in bound mode!\n",
1043 pChunk, pChunk->Core.Key, pChunk->cFree);
1044 AssertMsgFailed(("%p/%#x: cFree=%#x - it should be 0 in bound mode!\n", pChunk, pChunk->Core.Key, pChunk->cFree));
1045
1046 gmmR0UnlinkChunk(pChunk);
1047 pChunk->cFree = GMM_CHUNK_NUM_PAGES;
1048 gmmR0LinkChunk(pChunk, pChunk->cShared ? &g_pGMM->Shared : &g_pGMM->Private);
1049 }
1050 }
1051
1052 return 0;
1053}
1054
1055
1056/**
1057 * RTAvlU32Destroy callback for GMMR0CleanupVM.
1058 *
1059 * @returns 0
1060 * @param pNode The node (allocation chunk) to destroy.
1061 * @param pvGVM Pointer to the shared VM structure.
1062 */
1063/*static*/ DECLCALLBACK(int) gmmR0CleanupVMDestroyChunk(PAVLU32NODECORE pNode, void *pvGVM)
1064{
1065 PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
1066 PGVM pGVM = (PGVM)pvGVM;
1067
1068 for (unsigned i = 0; i < pChunk->cMappings; i++)
1069 {
1070 if (pChunk->paMappings[i].pGVM != pGVM)
1071 SUPR0Printf("gmmR0CleanupVMDestroyChunk: %p/%#x: mapping #%x: pGVM=%p exepcted %p\n", pChunk,
1072 pChunk->Core.Key, i, pChunk->paMappings[i].pGVM, pGVM);
1073 int rc = RTR0MemObjFree(pChunk->paMappings[i].MapObj, false /* fFreeMappings (NA) */);
1074 if (RT_FAILURE(rc))
1075 {
1076 SUPR0Printf("gmmR0CleanupVMDestroyChunk: %p/%#x: mapping #%x: RTRMemObjFree(%p,false) -> %d \n", pChunk,
1077 pChunk->Core.Key, i, pChunk->paMappings[i].MapObj, rc);
1078 AssertRC(rc);
1079 }
1080 }
1081
1082 int rc = RTR0MemObjFree(pChunk->MemObj, true /* fFreeMappings */);
1083 if (RT_FAILURE(rc))
1084 {
1085 SUPR0Printf("gmmR0CleanupVMDestroyChunk: %p/%#x: RTRMemObjFree(%p,true) -> %d (cMappings=%d)\n", pChunk,
1086 pChunk->Core.Key, pChunk->MemObj, rc, pChunk->cMappings);
1087 AssertRC(rc);
1088 }
1089 pChunk->MemObj = NIL_RTR0MEMOBJ;
1090
1091 RTMemFree(pChunk->paMappings);
1092 pChunk->paMappings = NULL;
1093
1094 RTMemFree(pChunk);
1095 return 0;
1096}
1097
1098
1099/**
1100 * The initial resource reservations.
1101 *
1102 * This will make memory reservations according to policy and priority. If there aren't
1103 * sufficient resources available to sustain the VM this function will fail and all
1104 * future allocations requests will fail as well.
1105 *
1106 * These are just the initial reservations made very very early during the VM creation
1107 * process and will be adjusted later in the GMMR0UpdateReservation call after the
1108 * ring-3 init has completed.
1109 *
1110 * @returns VBox status code.
1111 * @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1112 * @retval VERR_GMM_
1113 *
1114 * @param pVM Pointer to the shared VM structure.
1115 * @param idCpu VCPU id
1116 * @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1117 * This does not include MMIO2 and similar.
1118 * @param cShadowPages The number of pages that may be allocated for shadow paging structures.
1119 * @param cFixedPages The number of pages that may be allocated for fixed objects like the
1120 * hyper heap, MMIO2 and similar.
1121 * @param enmPolicy The OC policy to use on this VM.
1122 * @param enmPriority The priority in an out-of-memory situation.
1123 *
1124 * @thread The creator thread / EMT.
1125 */
1126GMMR0DECL(int) GMMR0InitialReservation(PVM pVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages,
1127 GMMOCPOLICY enmPolicy, GMMPRIORITY enmPriority)
1128{
1129 LogFlow(("GMMR0InitialReservation: pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x enmPolicy=%d enmPriority=%d\n",
1130 pVM, cBasePages, cShadowPages, cFixedPages, enmPolicy, enmPriority));
1131
1132 /*
1133 * Validate, get basics and take the semaphore.
1134 */
1135 PGMM pGMM;
1136 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1137 PGVM pGVM;
1138 int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
1139 if (RT_FAILURE(rc))
1140 return rc;
1141
1142 AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1143 AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1144 AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1145 AssertReturn(enmPolicy > GMMOCPOLICY_INVALID && enmPolicy < GMMOCPOLICY_END, VERR_INVALID_PARAMETER);
1146 AssertReturn(enmPriority > GMMPRIORITY_INVALID && enmPriority < GMMPRIORITY_END, VERR_INVALID_PARAMETER);
1147
1148 rc = RTSemFastMutexRequest(pGMM->Mtx);
1149 AssertRC(rc);
1150 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1151 {
1152 if ( !pGVM->gmm.s.Reserved.cBasePages
1153 && !pGVM->gmm.s.Reserved.cFixedPages
1154 && !pGVM->gmm.s.Reserved.cShadowPages)
1155 {
1156 /*
1157 * Check if we can accommodate this.
1158 */
1159 /* ... later ... */
1160 if (RT_SUCCESS(rc))
1161 {
1162 /*
1163 * Update the records.
1164 */
1165 pGVM->gmm.s.Reserved.cBasePages = cBasePages;
1166 pGVM->gmm.s.Reserved.cFixedPages = cFixedPages;
1167 pGVM->gmm.s.Reserved.cShadowPages = cShadowPages;
1168 pGVM->gmm.s.enmPolicy = enmPolicy;
1169 pGVM->gmm.s.enmPriority = enmPriority;
1170 pGVM->gmm.s.fMayAllocate = true;
1171
1172 pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1173 pGMM->cRegisteredVMs++;
1174 }
1175 }
1176 else
1177 rc = VERR_WRONG_ORDER;
1178 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1179 }
1180 else
1181 rc = VERR_INTERNAL_ERROR_5;
1182 RTSemFastMutexRelease(pGMM->Mtx);
1183 LogFlow(("GMMR0InitialReservation: returns %Rrc\n", rc));
1184 return rc;
1185}
1186
1187
1188/**
1189 * VMMR0 request wrapper for GMMR0InitialReservation.
1190 *
1191 * @returns see GMMR0InitialReservation.
1192 * @param pVM Pointer to the shared VM structure.
1193 * @param idCpu VCPU id
1194 * @param pReq The request packet.
1195 */
1196GMMR0DECL(int) GMMR0InitialReservationReq(PVM pVM, VMCPUID idCpu, PGMMINITIALRESERVATIONREQ pReq)
1197{
1198 /*
1199 * Validate input and pass it on.
1200 */
1201 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1202 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1203 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
1204
1205 return GMMR0InitialReservation(pVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages, pReq->enmPolicy, pReq->enmPriority);
1206}
1207
1208
1209/**
1210 * This updates the memory reservation with the additional MMIO2 and ROM pages.
1211 *
1212 * @returns VBox status code.
1213 * @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1214 *
1215 * @param pVM Pointer to the shared VM structure.
1216 * @param idCpu VCPU id
1217 * @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1218 * This does not include MMIO2 and similar.
1219 * @param cShadowPages The number of pages that may be allocated for shadow paging structures.
1220 * @param cFixedPages The number of pages that may be allocated for fixed objects like the
1221 * hyper heap, MMIO2 and similar.
1222 *
1223 * @thread EMT.
1224 */
1225GMMR0DECL(int) GMMR0UpdateReservation(PVM pVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages)
1226{
1227 LogFlow(("GMMR0UpdateReservation: pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x\n",
1228 pVM, cBasePages, cShadowPages, cFixedPages));
1229
1230 /*
1231 * Validate, get basics and take the semaphore.
1232 */
1233 PGMM pGMM;
1234 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1235 PGVM pGVM;
1236 int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
1237 if (RT_FAILURE(rc))
1238 return rc;
1239
1240 AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1241 AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1242 AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1243
1244 rc = RTSemFastMutexRequest(pGMM->Mtx);
1245 AssertRC(rc);
1246 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1247 {
1248 if ( pGVM->gmm.s.Reserved.cBasePages
1249 && pGVM->gmm.s.Reserved.cFixedPages
1250 && pGVM->gmm.s.Reserved.cShadowPages)
1251 {
1252 /*
1253 * Check if we can accommodate this.
1254 */
1255 /* ... later ... */
1256 if (RT_SUCCESS(rc))
1257 {
1258 /*
1259 * Update the records.
1260 */
1261 pGMM->cReservedPages -= pGVM->gmm.s.Reserved.cBasePages
1262 + pGVM->gmm.s.Reserved.cFixedPages
1263 + pGVM->gmm.s.Reserved.cShadowPages;
1264 pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1265
1266 pGVM->gmm.s.Reserved.cBasePages = cBasePages;
1267 pGVM->gmm.s.Reserved.cFixedPages = cFixedPages;
1268 pGVM->gmm.s.Reserved.cShadowPages = cShadowPages;
1269 }
1270 }
1271 else
1272 rc = VERR_WRONG_ORDER;
1273 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1274 }
1275 else
1276 rc = VERR_INTERNAL_ERROR_5;
1277 RTSemFastMutexRelease(pGMM->Mtx);
1278 LogFlow(("GMMR0UpdateReservation: returns %Rrc\n", rc));
1279 return rc;
1280}
1281
1282
1283/**
1284 * VMMR0 request wrapper for GMMR0UpdateReservation.
1285 *
1286 * @returns see GMMR0UpdateReservation.
1287 * @param pVM Pointer to the shared VM structure.
1288 * @param idCpu VCPU id
1289 * @param pReq The request packet.
1290 */
1291GMMR0DECL(int) GMMR0UpdateReservationReq(PVM pVM, VMCPUID idCpu, PGMMUPDATERESERVATIONREQ pReq)
1292{
1293 /*
1294 * Validate input and pass it on.
1295 */
1296 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1297 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1298 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
1299
1300 return GMMR0UpdateReservation(pVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages);
1301}
1302
1303
1304/**
1305 * Performs sanity checks on a free set.
1306 *
1307 * @returns Error count.
1308 *
1309 * @param pGMM Pointer to the GMM instance.
1310 * @param pSet Pointer to the set.
1311 * @param pszSetName The set name.
1312 * @param pszFunction The function from which it was called.
1313 * @param uLine The line number.
1314 */
1315static uint32_t gmmR0SanityCheckSet(PGMM pGMM, PGMMCHUNKFREESET pSet, const char *pszSetName,
1316 const char *pszFunction, unsigned uLineNo)
1317{
1318 uint32_t cErrors = 0;
1319
1320 /*
1321 * Count the free pages in all the chunks and match it against pSet->cFreePages.
1322 */
1323 uint32_t cPages = 0;
1324 for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1325 {
1326 for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1327 {
1328 /** @todo check that the chunk is hash into the right set. */
1329 cPages += pCur->cFree;
1330 }
1331 }
1332 if (RT_UNLIKELY(cPages != pSet->cFreePages))
1333 {
1334 SUPR0Printf("GMM insanity: found %#x pages in the %s set, expected %#x. (%s, line %u)\n",
1335 cPages, pszSetName, pSet->cFreePages, pszFunction, uLineNo);
1336 cErrors++;
1337 }
1338
1339 return cErrors;
1340}
1341
1342
1343/**
1344 * Performs some sanity checks on the GMM while owning lock.
1345 *
1346 * @returns Error count.
1347 *
1348 * @param pGMM Pointer to the GMM instance.
1349 * @param pszFunction The function from which it is called.
1350 * @param uLineNo The line number.
1351 */
1352static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo)
1353{
1354 uint32_t cErrors = 0;
1355
1356 cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->Private, "private", pszFunction, uLineNo);
1357 cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->Shared, "shared", pszFunction, uLineNo);
1358 /** @todo add more sanity checks. */
1359
1360 return cErrors;
1361}
1362
1363
1364/**
1365 * Looks up a chunk in the tree and fill in the TLB entry for it.
1366 *
1367 * This is not expected to fail and will bitch if it does.
1368 *
1369 * @returns Pointer to the allocation chunk, NULL if not found.
1370 * @param pGMM Pointer to the GMM instance.
1371 * @param idChunk The ID of the chunk to find.
1372 * @param pTlbe Pointer to the TLB entry.
1373 */
1374static PGMMCHUNK gmmR0GetChunkSlow(PGMM pGMM, uint32_t idChunk, PGMMCHUNKTLBE pTlbe)
1375{
1376 PGMMCHUNK pChunk = (PGMMCHUNK)RTAvlU32Get(&pGMM->pChunks, idChunk);
1377 AssertMsgReturn(pChunk, ("Chunk %#x not found!\n", idChunk), NULL);
1378 pTlbe->idChunk = idChunk;
1379 pTlbe->pChunk = pChunk;
1380 return pChunk;
1381}
1382
1383
1384/**
1385 * Finds a allocation chunk.
1386 *
1387 * This is not expected to fail and will bitch if it does.
1388 *
1389 * @returns Pointer to the allocation chunk, NULL if not found.
1390 * @param pGMM Pointer to the GMM instance.
1391 * @param idChunk The ID of the chunk to find.
1392 */
1393DECLINLINE(PGMMCHUNK) gmmR0GetChunk(PGMM pGMM, uint32_t idChunk)
1394{
1395 /*
1396 * Do a TLB lookup, branch if not in the TLB.
1397 */
1398 PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(idChunk)];
1399 if ( pTlbe->idChunk != idChunk
1400 || !pTlbe->pChunk)
1401 return gmmR0GetChunkSlow(pGMM, idChunk, pTlbe);
1402 return pTlbe->pChunk;
1403}
1404
1405
1406/**
1407 * Finds a page.
1408 *
1409 * This is not expected to fail and will bitch if it does.
1410 *
1411 * @returns Pointer to the page, NULL if not found.
1412 * @param pGMM Pointer to the GMM instance.
1413 * @param idPage The ID of the page to find.
1414 */
1415DECLINLINE(PGMMPAGE) gmmR0GetPage(PGMM pGMM, uint32_t idPage)
1416{
1417 PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
1418 if (RT_LIKELY(pChunk))
1419 return &pChunk->aPages[idPage & GMM_PAGEID_IDX_MASK];
1420 return NULL;
1421}
1422
1423
1424/**
1425 * Gets the host physical address for a page given by it's ID.
1426 *
1427 * @returns The host physical address or NIL_RTHCPHYS.
1428 * @param pGMM Pointer to the GMM instance.
1429 * @param idPage The ID of the page to find.
1430 */
1431DECLINLINE(RTHCPHYS) gmmR0GetPageHCPhys(PGMM pGMM, uint32_t idPage)
1432{
1433 PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
1434 if (RT_LIKELY(pChunk))
1435 return RTR0MemObjGetPagePhysAddr(pChunk->MemObj, idPage & GMM_PAGEID_IDX_MASK);
1436 return NIL_RTHCPHYS;
1437}
1438
1439
1440/**
1441 * Unlinks the chunk from the free list it's currently on (if any).
1442 *
1443 * @param pChunk The allocation chunk.
1444 */
1445DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk)
1446{
1447 PGMMCHUNKFREESET pSet = pChunk->pSet;
1448 if (RT_LIKELY(pSet))
1449 {
1450 pSet->cFreePages -= pChunk->cFree;
1451
1452 PGMMCHUNK pPrev = pChunk->pFreePrev;
1453 PGMMCHUNK pNext = pChunk->pFreeNext;
1454 if (pPrev)
1455 pPrev->pFreeNext = pNext;
1456 else
1457 pSet->apLists[(pChunk->cFree - 1) >> GMM_CHUNK_FREE_SET_SHIFT] = pNext;
1458 if (pNext)
1459 pNext->pFreePrev = pPrev;
1460
1461 pChunk->pSet = NULL;
1462 pChunk->pFreeNext = NULL;
1463 pChunk->pFreePrev = NULL;
1464 }
1465 else
1466 {
1467 Assert(!pChunk->pFreeNext);
1468 Assert(!pChunk->pFreePrev);
1469 Assert(!pChunk->cFree);
1470 }
1471}
1472
1473
1474/**
1475 * Links the chunk onto the appropriate free list in the specified free set.
1476 *
1477 * If no free entries, it's not linked into any list.
1478 *
1479 * @param pChunk The allocation chunk.
1480 * @param pSet The free set.
1481 */
1482DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet)
1483{
1484 Assert(!pChunk->pSet);
1485 Assert(!pChunk->pFreeNext);
1486 Assert(!pChunk->pFreePrev);
1487
1488 if (pChunk->cFree > 0)
1489 {
1490 pChunk->pSet = pSet;
1491 pChunk->pFreePrev = NULL;
1492 unsigned iList = (pChunk->cFree - 1) >> GMM_CHUNK_FREE_SET_SHIFT;
1493 pChunk->pFreeNext = pSet->apLists[iList];
1494 if (pChunk->pFreeNext)
1495 pChunk->pFreeNext->pFreePrev = pChunk;
1496 pSet->apLists[iList] = pChunk;
1497
1498 pSet->cFreePages += pChunk->cFree;
1499 }
1500}
1501
1502
1503/**
1504 * Frees a Chunk ID.
1505 *
1506 * @param pGMM Pointer to the GMM instance.
1507 * @param idChunk The Chunk ID to free.
1508 */
1509static void gmmR0FreeChunkId(PGMM pGMM, uint32_t idChunk)
1510{
1511 AssertReturnVoid(idChunk != NIL_GMM_CHUNKID);
1512 AssertMsg(ASMBitTest(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk));
1513 ASMAtomicBitClear(&pGMM->bmChunkId[0], idChunk);
1514}
1515
1516
1517/**
1518 * Allocates a new Chunk ID.
1519 *
1520 * @returns The Chunk ID.
1521 * @param pGMM Pointer to the GMM instance.
1522 */
1523static uint32_t gmmR0AllocateChunkId(PGMM pGMM)
1524{
1525 AssertCompile(!((GMM_CHUNKID_LAST + 1) & 31)); /* must be a multiple of 32 */
1526 AssertCompile(NIL_GMM_CHUNKID == 0);
1527
1528 /*
1529 * Try the next sequential one.
1530 */
1531 int32_t idChunk = ++pGMM->idChunkPrev;
1532#if 0 /* test the fallback first */
1533 if ( idChunk <= GMM_CHUNKID_LAST
1534 && idChunk > NIL_GMM_CHUNKID
1535 && !ASMAtomicBitTestAndSet(&pVMM->bmChunkId[0], idChunk))
1536 return idChunk;
1537#endif
1538
1539 /*
1540 * Scan sequentially from the last one.
1541 */
1542 if ( (uint32_t)idChunk < GMM_CHUNKID_LAST
1543 && idChunk > NIL_GMM_CHUNKID)
1544 {
1545 idChunk = ASMBitNextClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1, idChunk);
1546 if (idChunk > NIL_GMM_CHUNKID)
1547 {
1548 AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
1549 return pGMM->idChunkPrev = idChunk;
1550 }
1551 }
1552
1553 /*
1554 * Ok, scan from the start.
1555 * We're not racing anyone, so there is no need to expect failures or have restart loops.
1556 */
1557 idChunk = ASMBitFirstClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1);
1558 AssertMsgReturn(idChunk > NIL_GMM_CHUNKID, ("%#x\n", idChunk), NIL_GVM_HANDLE);
1559 AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
1560
1561 return pGMM->idChunkPrev = idChunk;
1562}
1563
1564
1565/**
1566 * Registers a new chunk of memory.
1567 *
1568 * This is called by both gmmR0AllocateOneChunk and GMMR0SeedChunk. The caller
1569 * must own the global lock.
1570 *
1571 * @returns VBox status code.
1572 * @param pGMM Pointer to the GMM instance.
1573 * @param pSet Pointer to the set.
1574 * @param MemObj The memory object for the chunk.
1575 * @param hGVM The affinity of the chunk. NIL_GVM_HANDLE for no
1576 * affinity.
1577 * @param enmChunkType Chunk type (continuous or non-continuous)
1578 * @param ppChunk Chunk address (out)
1579 */
1580static int gmmR0RegisterChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, RTR0MEMOBJ MemObj, uint16_t hGVM, GMMCHUNKTYPE enmChunkType, PGMMCHUNK *ppChunk = NULL)
1581{
1582 Assert(hGVM != NIL_GVM_HANDLE || pGMM->fBoundMemoryMode);
1583
1584 int rc;
1585 PGMMCHUNK pChunk = (PGMMCHUNK)RTMemAllocZ(sizeof(*pChunk));
1586 if (pChunk)
1587 {
1588 /*
1589 * Initialize it.
1590 */
1591 pChunk->MemObj = MemObj;
1592 pChunk->cFree = GMM_CHUNK_NUM_PAGES;
1593 pChunk->hGVM = hGVM;
1594 pChunk->iFreeHead = 0;
1595 pChunk->enmType = enmChunkType;
1596 for (unsigned iPage = 0; iPage < RT_ELEMENTS(pChunk->aPages) - 1; iPage++)
1597 {
1598 pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
1599 pChunk->aPages[iPage].Free.iNext = iPage + 1;
1600 }
1601 pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.u2State = GMM_PAGE_STATE_FREE;
1602 pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.iNext = UINT16_MAX;
1603
1604 /*
1605 * Allocate a Chunk ID and insert it into the tree.
1606 * This has to be done behind the mutex of course.
1607 */
1608 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1609 {
1610 pChunk->Core.Key = gmmR0AllocateChunkId(pGMM);
1611 if ( pChunk->Core.Key != NIL_GMM_CHUNKID
1612 && pChunk->Core.Key <= GMM_CHUNKID_LAST
1613 && RTAvlU32Insert(&pGMM->pChunks, &pChunk->Core))
1614 {
1615 pGMM->cChunks++;
1616 gmmR0LinkChunk(pChunk, pSet);
1617 LogFlow(("gmmR0RegisterChunk: pChunk=%p id=%#x cChunks=%d\n", pChunk, pChunk->Core.Key, pGMM->cChunks));
1618
1619 if (ppChunk)
1620 *ppChunk = pChunk;
1621
1622 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1623 return VINF_SUCCESS;
1624 }
1625
1626 /* bail out */
1627 rc = VERR_INTERNAL_ERROR;
1628 }
1629 else
1630 rc = VERR_INTERNAL_ERROR_5;
1631
1632 RTMemFree(pChunk);
1633 }
1634 else
1635 rc = VERR_NO_MEMORY;
1636 return rc;
1637}
1638
1639
1640/**
1641 * Allocate one new chunk and add it to the specified free set.
1642 *
1643 * @returns VBox status code.
1644 * @param pGMM Pointer to the GMM instance.
1645 * @param pSet Pointer to the set.
1646 * @param hGVM The affinity of the new chunk.
1647 * @param enmChunkType Chunk type (continuous or non-continuous)
1648 * @param ppChunk Chunk address (out)
1649 *
1650 * @remarks Called without owning the mutex.
1651 */
1652static int gmmR0AllocateOneChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, uint16_t hGVM, GMMCHUNKTYPE enmChunkType, PGMMCHUNK *ppChunk = NULL)
1653{
1654 /*
1655 * Allocate the memory.
1656 */
1657 RTR0MEMOBJ MemObj;
1658 int rc;
1659
1660 AssertCompile(GMM_CHUNK_SIZE == _2M);
1661 AssertReturn(enmChunkType == GMMCHUNKTYPE_NON_CONTINUOUS || enmChunkType == GMMCHUNKTYPE_CONTINUOUS, VERR_INVALID_PARAMETER);
1662
1663 /* Leave the lock temporarily as the allocation might take long. */
1664 RTSemFastMutexRelease(pGMM->Mtx);
1665 if (enmChunkType == GMMCHUNKTYPE_NON_CONTINUOUS)
1666 rc = RTR0MemObjAllocPhysNC(&MemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS);
1667 else
1668 rc = RTR0MemObjAllocPhysEx(&MemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS, GMM_CHUNK_SIZE);
1669
1670 /* Grab the lock again. */
1671 int rc2 = RTSemFastMutexRequest(pGMM->Mtx);
1672 AssertRCReturn(rc2, rc2);
1673
1674 if (RT_SUCCESS(rc))
1675 {
1676 rc = gmmR0RegisterChunk(pGMM, pSet, MemObj, hGVM, enmChunkType, ppChunk);
1677 if (RT_FAILURE(rc))
1678 RTR0MemObjFree(MemObj, false /* fFreeMappings */);
1679 }
1680 /** @todo Check that RTR0MemObjAllocPhysNC always returns VERR_NO_MEMORY on
1681 * allocation failure. */
1682 return rc;
1683}
1684
1685
1686/**
1687 * Attempts to allocate more pages until the requested amount is met.
1688 *
1689 * @returns VBox status code.
1690 * @param pGMM Pointer to the GMM instance data.
1691 * @param pGVM The calling VM.
1692 * @param pSet Pointer to the free set to grow.
1693 * @param cPages The number of pages needed.
1694 *
1695 * @remarks Called owning the mutex, but will leave it temporarily while
1696 * allocating the memory!
1697 */
1698static int gmmR0AllocateMoreChunks(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet, uint32_t cPages)
1699{
1700 Assert(!pGMM->fLegacyAllocationMode);
1701
1702 if (!GMM_CHECK_SANITY_IN_LOOPS(pGMM))
1703 return VERR_INTERNAL_ERROR_4;
1704
1705 if (!pGMM->fBoundMemoryMode)
1706 {
1707 /*
1708 * Try steal free chunks from the other set first. (Only take 100% free chunks.)
1709 */
1710 PGMMCHUNKFREESET pOtherSet = pSet == &pGMM->Private ? &pGMM->Shared : &pGMM->Private;
1711 while ( pSet->cFreePages < cPages
1712 && pOtherSet->cFreePages >= GMM_CHUNK_NUM_PAGES)
1713 {
1714 PGMMCHUNK pChunk = pOtherSet->apLists[GMM_CHUNK_FREE_SET_LISTS - 1];
1715 while (pChunk && pChunk->cFree != GMM_CHUNK_NUM_PAGES)
1716 pChunk = pChunk->pFreeNext;
1717 if (!pChunk)
1718 break;
1719
1720 gmmR0UnlinkChunk(pChunk);
1721 gmmR0LinkChunk(pChunk, pSet);
1722 }
1723
1724 /*
1725 * If we need still more pages, allocate new chunks.
1726 * Note! We will leave the mutex while doing the allocation,
1727 */
1728 while (pSet->cFreePages < cPages)
1729 {
1730 int rc = gmmR0AllocateOneChunk(pGMM, pSet, pGVM->hSelf, GMMCHUNKTYPE_NON_CONTINUOUS);
1731 if (RT_FAILURE(rc))
1732 return rc;
1733 if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1734 return VERR_INTERNAL_ERROR_5;
1735 }
1736 }
1737 else
1738 {
1739 /*
1740 * The memory is bound to the VM allocating it, so we have to count
1741 * the free pages carefully as well as making sure we brand them with
1742 * our VM handle.
1743 *
1744 * Note! We will leave the mutex while doing the allocation,
1745 */
1746 uint16_t const hGVM = pGVM->hSelf;
1747 for (;;)
1748 {
1749 /* Count and see if we've reached the goal. */
1750 uint32_t cPagesFound = 0;
1751 for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1752 for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1753 if (pCur->hGVM == hGVM)
1754 {
1755 cPagesFound += pCur->cFree;
1756 if (cPagesFound >= cPages)
1757 break;
1758 }
1759 if (cPagesFound >= cPages)
1760 break;
1761
1762 /* Allocate more. */
1763 int rc = gmmR0AllocateOneChunk(pGMM, pSet, hGVM, GMMCHUNKTYPE_NON_CONTINUOUS);
1764 if (RT_FAILURE(rc))
1765 return rc;
1766 if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1767 return VERR_INTERNAL_ERROR_5;
1768 }
1769 }
1770
1771 return VINF_SUCCESS;
1772}
1773
1774
1775/**
1776 * Allocates one private page.
1777 *
1778 * Worker for gmmR0AllocatePages.
1779 *
1780 * @param pGMM Pointer to the GMM instance data.
1781 * @param hGVM The GVM handle of the VM requesting memory.
1782 * @param pChunk The chunk to allocate it from.
1783 * @param pPageDesc The page descriptor.
1784 */
1785static void gmmR0AllocatePage(PGMM pGMM, uint32_t hGVM, PGMMCHUNK pChunk, PGMMPAGEDESC pPageDesc)
1786{
1787 /* update the chunk stats. */
1788 if (pChunk->hGVM == NIL_GVM_HANDLE)
1789 pChunk->hGVM = hGVM;
1790 Assert(pChunk->cFree);
1791 pChunk->cFree--;
1792 pChunk->cPrivate++;
1793
1794 /* unlink the first free page. */
1795 const uint32_t iPage = pChunk->iFreeHead;
1796 AssertReleaseMsg(iPage < RT_ELEMENTS(pChunk->aPages), ("%d\n", iPage));
1797 PGMMPAGE pPage = &pChunk->aPages[iPage];
1798 Assert(GMM_PAGE_IS_FREE(pPage));
1799 pChunk->iFreeHead = pPage->Free.iNext;
1800 Log3(("A pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x iNext=%#x\n",
1801 pPage, iPage, (pChunk->Core.Key << GMM_CHUNKID_SHIFT) | iPage,
1802 pPage->Common.u2State, pChunk->iFreeHead, pPage->Free.iNext));
1803
1804 /* make the page private. */
1805 pPage->u = 0;
1806 AssertCompile(GMM_PAGE_STATE_PRIVATE == 0);
1807 pPage->Private.hGVM = hGVM;
1808 AssertCompile(NIL_RTHCPHYS >= GMM_GCPHYS_LAST);
1809 AssertCompile(GMM_GCPHYS_UNSHAREABLE >= GMM_GCPHYS_LAST);
1810 if (pPageDesc->HCPhysGCPhys <= GMM_GCPHYS_LAST)
1811 pPage->Private.pfn = pPageDesc->HCPhysGCPhys >> PAGE_SHIFT;
1812 else
1813 pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE; /* unshareable / unassigned - same thing. */
1814
1815 /* update the page descriptor. */
1816 pPageDesc->HCPhysGCPhys = RTR0MemObjGetPagePhysAddr(pChunk->MemObj, iPage);
1817 Assert(pPageDesc->HCPhysGCPhys != NIL_RTHCPHYS);
1818 pPageDesc->idPage = (pChunk->Core.Key << GMM_CHUNKID_SHIFT) | iPage;
1819 pPageDesc->idSharedPage = NIL_GMM_PAGEID;
1820}
1821
1822
1823/**
1824 * Common worker for GMMR0AllocateHandyPages and GMMR0AllocatePages.
1825 *
1826 * @returns VBox status code:
1827 * @retval VINF_SUCCESS on success.
1828 * @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk or
1829 * gmmR0AllocateMoreChunks is necessary.
1830 * @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
1831 * @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
1832 * that is we're trying to allocate more than we've reserved.
1833 *
1834 * @param pGMM Pointer to the GMM instance data.
1835 * @param pGVM Pointer to the shared VM structure.
1836 * @param cPages The number of pages to allocate.
1837 * @param paPages Pointer to the page descriptors.
1838 * See GMMPAGEDESC for details on what is expected on input.
1839 * @param enmAccount The account to charge.
1840 */
1841static int gmmR0AllocatePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
1842{
1843 /*
1844 * Check allocation limits.
1845 */
1846 if (RT_UNLIKELY(pGMM->cAllocatedPages + cPages > pGMM->cMaxPages))
1847 return VERR_GMM_HIT_GLOBAL_LIMIT;
1848
1849 switch (enmAccount)
1850 {
1851 case GMMACCOUNT_BASE:
1852 if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cPages > pGVM->gmm.s.Reserved.cBasePages))
1853 {
1854 Log(("gmmR0AllocatePages:Base: Reserved=%#llx Allocated+Ballooned+Requested=%#llx+%#llx+%#x!\n",
1855 pGVM->gmm.s.Reserved.cBasePages, pGVM->gmm.s.Allocated.cBasePages, pGVM->gmm.s.cBalloonedPages, cPages));
1856 return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
1857 }
1858 break;
1859 case GMMACCOUNT_SHADOW:
1860 if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cShadowPages + cPages > pGVM->gmm.s.Reserved.cShadowPages))
1861 {
1862 Log(("gmmR0AllocatePages:Shadow: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
1863 pGVM->gmm.s.Reserved.cShadowPages, pGVM->gmm.s.Allocated.cShadowPages, cPages));
1864 return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
1865 }
1866 break;
1867 case GMMACCOUNT_FIXED:
1868 if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cFixedPages + cPages > pGVM->gmm.s.Reserved.cFixedPages))
1869 {
1870 Log(("gmmR0AllocatePages:Fixed: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
1871 pGVM->gmm.s.Reserved.cFixedPages, pGVM->gmm.s.Allocated.cFixedPages, cPages));
1872 return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
1873 }
1874 break;
1875 default:
1876 AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
1877 }
1878
1879 /*
1880 * Check if we need to allocate more memory or not. In bound memory mode this
1881 * is a bit extra work but it's easier to do it upfront than bailing out later.
1882 */
1883 PGMMCHUNKFREESET pSet = &pGMM->Private;
1884 if (pSet->cFreePages < cPages)
1885 return VERR_GMM_SEED_ME;
1886 if (pGMM->fBoundMemoryMode)
1887 {
1888 uint16_t hGVM = pGVM->hSelf;
1889 uint32_t cPagesFound = 0;
1890 for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1891 for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1892 if (pCur->hGVM == hGVM)
1893 {
1894 cPagesFound += pCur->cFree;
1895 if (cPagesFound >= cPages)
1896 break;
1897 }
1898 if (cPagesFound < cPages)
1899 return VERR_GMM_SEED_ME;
1900 }
1901
1902 /*
1903 * Pick the pages.
1904 * Try make some effort keeping VMs sharing private chunks.
1905 */
1906 uint16_t hGVM = pGVM->hSelf;
1907 uint32_t iPage = 0;
1908
1909 /* first round, pick from chunks with an affinity to the VM. */
1910 for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists) && iPage < cPages; i++)
1911 {
1912 PGMMCHUNK pCurFree = NULL;
1913 PGMMCHUNK pCur = pSet->apLists[i];
1914 while (pCur && iPage < cPages)
1915 {
1916 PGMMCHUNK pNext = pCur->pFreeNext;
1917
1918 if ( pCur->hGVM == hGVM
1919 && pCur->cFree < GMM_CHUNK_NUM_PAGES)
1920 {
1921 gmmR0UnlinkChunk(pCur);
1922 for (; pCur->cFree && iPage < cPages; iPage++)
1923 gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
1924 gmmR0LinkChunk(pCur, pSet);
1925 }
1926
1927 pCur = pNext;
1928 }
1929 }
1930
1931 if (iPage < cPages)
1932 {
1933 /* second round, pick pages from the 100% empty chunks we just skipped above. */
1934 PGMMCHUNK pCurFree = NULL;
1935 PGMMCHUNK pCur = pSet->apLists[RT_ELEMENTS(pSet->apLists) - 1];
1936 while (pCur && iPage < cPages)
1937 {
1938 PGMMCHUNK pNext = pCur->pFreeNext;
1939
1940 if ( pCur->cFree == GMM_CHUNK_NUM_PAGES
1941 && ( pCur->hGVM == hGVM
1942 || !pGMM->fBoundMemoryMode))
1943 {
1944 gmmR0UnlinkChunk(pCur);
1945 for (; pCur->cFree && iPage < cPages; iPage++)
1946 gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
1947 gmmR0LinkChunk(pCur, pSet);
1948 }
1949
1950 pCur = pNext;
1951 }
1952 }
1953
1954 if ( iPage < cPages
1955 && !pGMM->fBoundMemoryMode)
1956 {
1957 /* third round, disregard affinity. */
1958 unsigned i = RT_ELEMENTS(pSet->apLists);
1959 while (i-- > 0 && iPage < cPages)
1960 {
1961 PGMMCHUNK pCurFree = NULL;
1962 PGMMCHUNK pCur = pSet->apLists[i];
1963 while (pCur && iPage < cPages)
1964 {
1965 PGMMCHUNK pNext = pCur->pFreeNext;
1966
1967 if ( pCur->cFree > GMM_CHUNK_NUM_PAGES / 2
1968 && cPages >= GMM_CHUNK_NUM_PAGES / 2)
1969 pCur->hGVM = hGVM; /* change chunk affinity */
1970
1971 gmmR0UnlinkChunk(pCur);
1972 for (; pCur->cFree && iPage < cPages; iPage++)
1973 gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
1974 gmmR0LinkChunk(pCur, pSet);
1975
1976 pCur = pNext;
1977 }
1978 }
1979 }
1980
1981 /*
1982 * Update the account.
1983 */
1984 switch (enmAccount)
1985 {
1986 case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages += iPage; break;
1987 case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages += iPage; break;
1988 case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages += iPage; break;
1989 default:
1990 AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
1991 }
1992 pGVM->gmm.s.cPrivatePages += iPage;
1993 pGMM->cAllocatedPages += iPage;
1994
1995 AssertMsgReturn(iPage == cPages, ("%u != %u\n", iPage, cPages), VERR_INTERNAL_ERROR);
1996
1997 /*
1998 * Check if we've reached some threshold and should kick one or two VMs and tell
1999 * them to inflate their balloons a bit more... later.
2000 */
2001
2002 return VINF_SUCCESS;
2003}
2004
2005
2006/**
2007 * Updates the previous allocations and allocates more pages.
2008 *
2009 * The handy pages are always taken from the 'base' memory account.
2010 * The allocated pages are not cleared and will contains random garbage.
2011 *
2012 * @returns VBox status code:
2013 * @retval VINF_SUCCESS on success.
2014 * @retval VERR_NOT_OWNER if the caller is not an EMT.
2015 * @retval VERR_GMM_PAGE_NOT_FOUND if one of the pages to update wasn't found.
2016 * @retval VERR_GMM_PAGE_NOT_PRIVATE if one of the pages to update wasn't a
2017 * private page.
2018 * @retval VERR_GMM_PAGE_NOT_SHARED if one of the pages to update wasn't a
2019 * shared page.
2020 * @retval VERR_GMM_NOT_PAGE_OWNER if one of the pages to be updated wasn't
2021 * owned by the VM.
2022 * @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2023 * @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2024 * @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2025 * that is we're trying to allocate more than we've reserved.
2026 *
2027 * @param pVM Pointer to the shared VM structure.
2028 * @param idCpu VCPU id
2029 * @param cPagesToUpdate The number of pages to update (starting from the head).
2030 * @param cPagesToAlloc The number of pages to allocate (starting from the head).
2031 * @param paPages The array of page descriptors.
2032 * See GMMPAGEDESC for details on what is expected on input.
2033 * @thread EMT.
2034 */
2035GMMR0DECL(int) GMMR0AllocateHandyPages(PVM pVM, VMCPUID idCpu, uint32_t cPagesToUpdate, uint32_t cPagesToAlloc, PGMMPAGEDESC paPages)
2036{
2037 LogFlow(("GMMR0AllocateHandyPages: pVM=%p cPagesToUpdate=%#x cPagesToAlloc=%#x paPages=%p\n",
2038 pVM, cPagesToUpdate, cPagesToAlloc, paPages));
2039
2040 /*
2041 * Validate, get basics and take the semaphore.
2042 * (This is a relatively busy path, so make predictions where possible.)
2043 */
2044 PGMM pGMM;
2045 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2046 PGVM pGVM;
2047 int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2048 if (RT_FAILURE(rc))
2049 return rc;
2050
2051 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2052 AssertMsgReturn( (cPagesToUpdate && cPagesToUpdate < 1024)
2053 || (cPagesToAlloc && cPagesToAlloc < 1024),
2054 ("cPagesToUpdate=%#x cPagesToAlloc=%#x\n", cPagesToUpdate, cPagesToAlloc),
2055 VERR_INVALID_PARAMETER);
2056
2057 unsigned iPage = 0;
2058 for (; iPage < cPagesToUpdate; iPage++)
2059 {
2060 AssertMsgReturn( ( paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
2061 && !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK))
2062 || paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
2063 || paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE,
2064 ("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys),
2065 VERR_INVALID_PARAMETER);
2066 AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2067 /*|| paPages[iPage].idPage == NIL_GMM_PAGEID*/,
2068 ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2069 AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2070 /*|| paPages[iPage].idSharedPage == NIL_GMM_PAGEID*/,
2071 ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2072 }
2073
2074 for (; iPage < cPagesToAlloc; iPage++)
2075 {
2076 AssertMsgReturn(paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS, ("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys), VERR_INVALID_PARAMETER);
2077 AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2078 AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2079 }
2080
2081 rc = RTSemFastMutexRequest(pGMM->Mtx);
2082 AssertRC(rc);
2083 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2084 {
2085 /* No allocations before the initial reservation has been made! */
2086 if (RT_LIKELY( pGVM->gmm.s.Reserved.cBasePages
2087 && pGVM->gmm.s.Reserved.cFixedPages
2088 && pGVM->gmm.s.Reserved.cShadowPages))
2089 {
2090 /*
2091 * Perform the updates.
2092 * Stop on the first error.
2093 */
2094 for (iPage = 0; iPage < cPagesToUpdate; iPage++)
2095 {
2096 if (paPages[iPage].idPage != NIL_GMM_PAGEID)
2097 {
2098 PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idPage);
2099 if (RT_LIKELY(pPage))
2100 {
2101 if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
2102 {
2103 if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
2104 {
2105 AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
2106 if (RT_LIKELY(paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST))
2107 pPage->Private.pfn = paPages[iPage].HCPhysGCPhys >> PAGE_SHIFT;
2108 else if (paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE)
2109 pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE;
2110 /* else: NIL_RTHCPHYS nothing */
2111
2112 paPages[iPage].idPage = NIL_GMM_PAGEID;
2113 paPages[iPage].HCPhysGCPhys = NIL_RTHCPHYS;
2114 }
2115 else
2116 {
2117 Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not owner! hGVM=%#x hSelf=%#x\n",
2118 iPage, paPages[iPage].idPage, pPage->Private.hGVM, pGVM->hSelf));
2119 rc = VERR_GMM_NOT_PAGE_OWNER;
2120 break;
2121 }
2122 }
2123 else
2124 {
2125 Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not private! %.*Rhxs (type %d)\n", iPage, paPages[iPage].idPage, sizeof(*pPage), pPage, pPage->Common.u2State));
2126 rc = VERR_GMM_PAGE_NOT_PRIVATE;
2127 break;
2128 }
2129 }
2130 else
2131 {
2132 Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (private)\n", iPage, paPages[iPage].idPage));
2133 rc = VERR_GMM_PAGE_NOT_FOUND;
2134 break;
2135 }
2136 }
2137
2138 if (paPages[iPage].idSharedPage != NIL_GMM_PAGEID)
2139 {
2140 PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idSharedPage);
2141 if (RT_LIKELY(pPage))
2142 {
2143 if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
2144 {
2145 AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
2146 Assert(pPage->Shared.cRefs);
2147 Assert(pGVM->gmm.s.cSharedPages);
2148 Assert(pGVM->gmm.s.Allocated.cBasePages);
2149
2150 Log(("GMMR0AllocateHandyPages: free shared page %x cRefs=%d\n", paPages[iPage].idSharedPage, pPage->Shared.cRefs));
2151 pGVM->gmm.s.cSharedPages--;
2152 pGVM->gmm.s.Allocated.cBasePages--;
2153 if (!--pPage->Shared.cRefs)
2154 {
2155 gmmR0FreeSharedPage(pGMM, paPages[iPage].idSharedPage, pPage);
2156 }
2157 else
2158 {
2159 Assert(pGMM->cDuplicatePages);
2160 pGMM->cDuplicatePages--;
2161 }
2162
2163 paPages[iPage].idSharedPage = NIL_GMM_PAGEID;
2164 }
2165 else
2166 {
2167 Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not shared!\n", iPage, paPages[iPage].idSharedPage));
2168 rc = VERR_GMM_PAGE_NOT_SHARED;
2169 break;
2170 }
2171 }
2172 else
2173 {
2174 Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (shared)\n", iPage, paPages[iPage].idSharedPage));
2175 rc = VERR_GMM_PAGE_NOT_FOUND;
2176 break;
2177 }
2178 }
2179 }
2180
2181 /*
2182 * Join paths with GMMR0AllocatePages for the allocation.
2183 * Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
2184 */
2185 while (RT_SUCCESS(rc))
2186 {
2187 rc = gmmR0AllocatePages(pGMM, pGVM, cPagesToAlloc, paPages, GMMACCOUNT_BASE);
2188 if ( rc != VERR_GMM_SEED_ME
2189 || pGMM->fLegacyAllocationMode)
2190 break;
2191 rc = gmmR0AllocateMoreChunks(pGMM, pGVM, &pGMM->Private, cPagesToAlloc);
2192 }
2193 }
2194 else
2195 rc = VERR_WRONG_ORDER;
2196 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2197 }
2198 else
2199 rc = VERR_INTERNAL_ERROR_5;
2200 RTSemFastMutexRelease(pGMM->Mtx);
2201 LogFlow(("GMMR0AllocateHandyPages: returns %Rrc\n", rc));
2202 return rc;
2203}
2204
2205
2206/**
2207 * Allocate one or more pages.
2208 *
2209 * This is typically used for ROMs and MMIO2 (VRAM) during VM creation.
2210 * The allocated pages are not cleared and will contains random garbage.
2211 *
2212 * @returns VBox status code:
2213 * @retval VINF_SUCCESS on success.
2214 * @retval VERR_NOT_OWNER if the caller is not an EMT.
2215 * @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2216 * @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2217 * @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2218 * that is we're trying to allocate more than we've reserved.
2219 *
2220 * @param pVM Pointer to the shared VM structure.
2221 * @param idCpu VCPU id
2222 * @param cPages The number of pages to allocate.
2223 * @param paPages Pointer to the page descriptors.
2224 * See GMMPAGEDESC for details on what is expected on input.
2225 * @param enmAccount The account to charge.
2226 *
2227 * @thread EMT.
2228 */
2229GMMR0DECL(int) GMMR0AllocatePages(PVM pVM, VMCPUID idCpu, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
2230{
2231 LogFlow(("GMMR0AllocatePages: pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pVM, cPages, paPages, enmAccount));
2232
2233 /*
2234 * Validate, get basics and take the semaphore.
2235 */
2236 PGMM pGMM;
2237 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2238 PGVM pGVM;
2239 int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2240 if (RT_FAILURE(rc))
2241 return rc;
2242
2243 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2244 AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
2245 AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
2246
2247 for (unsigned iPage = 0; iPage < cPages; iPage++)
2248 {
2249 AssertMsgReturn( paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
2250 || paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE
2251 || ( enmAccount == GMMACCOUNT_BASE
2252 && paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
2253 && !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK)),
2254 ("#%#x: %RHp enmAccount=%d\n", iPage, paPages[iPage].HCPhysGCPhys, enmAccount),
2255 VERR_INVALID_PARAMETER);
2256 AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2257 AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2258 }
2259
2260 rc = RTSemFastMutexRequest(pGMM->Mtx);
2261 AssertRC(rc);
2262 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2263 {
2264
2265 /* No allocations before the initial reservation has been made! */
2266 if (RT_LIKELY( pGVM->gmm.s.Reserved.cBasePages
2267 && pGVM->gmm.s.Reserved.cFixedPages
2268 && pGVM->gmm.s.Reserved.cShadowPages))
2269 {
2270 /*
2271 * gmmR0AllocatePages seed loop.
2272 * Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
2273 */
2274 while (RT_SUCCESS(rc))
2275 {
2276 rc = gmmR0AllocatePages(pGMM, pGVM, cPages, paPages, enmAccount);
2277 if ( rc != VERR_GMM_SEED_ME
2278 || pGMM->fLegacyAllocationMode)
2279 break;
2280 rc = gmmR0AllocateMoreChunks(pGMM, pGVM, &pGMM->Private, cPages);
2281 }
2282 }
2283 else
2284 rc = VERR_WRONG_ORDER;
2285 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2286 }
2287 else
2288 rc = VERR_INTERNAL_ERROR_5;
2289 RTSemFastMutexRelease(pGMM->Mtx);
2290 LogFlow(("GMMR0AllocatePages: returns %Rrc\n", rc));
2291 return rc;
2292}
2293
2294
2295/**
2296 * VMMR0 request wrapper for GMMR0AllocatePages.
2297 *
2298 * @returns see GMMR0AllocatePages.
2299 * @param pVM Pointer to the shared VM structure.
2300 * @param idCpu VCPU id
2301 * @param pReq The request packet.
2302 */
2303GMMR0DECL(int) GMMR0AllocatePagesReq(PVM pVM, VMCPUID idCpu, PGMMALLOCATEPAGESREQ pReq)
2304{
2305 /*
2306 * Validate input and pass it on.
2307 */
2308 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2309 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2310 AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0]),
2311 ("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0])),
2312 VERR_INVALID_PARAMETER);
2313 AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[pReq->cPages]),
2314 ("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[pReq->cPages])),
2315 VERR_INVALID_PARAMETER);
2316
2317 return GMMR0AllocatePages(pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
2318}
2319
2320/**
2321 * Allocate a large page to represent guest RAM
2322 *
2323 * The allocated pages are not cleared and will contains random garbage.
2324 *
2325 * @returns VBox status code:
2326 * @retval VINF_SUCCESS on success.
2327 * @retval VERR_NOT_OWNER if the caller is not an EMT.
2328 * @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2329 * @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2330 * @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2331 * that is we're trying to allocate more than we've reserved.
2332 * @returns see GMMR0AllocatePages.
2333 * @param pVM Pointer to the shared VM structure.
2334 * @param idCpu VCPU id
2335 * @param cbPage Large page size
2336 */
2337GMMR0DECL(int) GMMR0AllocateLargePage(PVM pVM, VMCPUID idCpu, uint32_t cbPage, uint32_t *pIdPage, RTHCPHYS *pHCPhys)
2338{
2339 LogFlow(("GMMR0AllocateLargePage: pVM=%p cbPage=%x\n", pVM, cbPage));
2340
2341 AssertReturn(cbPage == GMM_CHUNK_SIZE, VERR_INVALID_PARAMETER);
2342 AssertPtrReturn(pIdPage, VERR_INVALID_PARAMETER);
2343 AssertPtrReturn(pHCPhys, VERR_INVALID_PARAMETER);
2344
2345 /*
2346 * Validate, get basics and take the semaphore.
2347 */
2348 PGMM pGMM;
2349 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2350 PGVM pGVM;
2351 int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2352 if (RT_FAILURE(rc))
2353 return rc;
2354
2355 /* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
2356 if (pGMM->fLegacyAllocationMode)
2357 return VERR_NOT_SUPPORTED;
2358
2359 *pHCPhys = NIL_RTHCPHYS;
2360 *pIdPage = NIL_GMM_PAGEID;
2361
2362 rc = RTSemFastMutexRequest(pGMM->Mtx);
2363 AssertRCReturn(rc, rc);
2364 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2365 {
2366 const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
2367 PGMMCHUNK pChunk;
2368 GMMPAGEDESC PageDesc;
2369
2370 if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cPages > pGVM->gmm.s.Reserved.cBasePages))
2371 {
2372 Log(("GMMR0AllocateLargePage: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
2373 pGVM->gmm.s.Reserved.cBasePages, pGVM->gmm.s.Allocated.cBasePages, cPages));
2374 RTSemFastMutexRelease(pGMM->Mtx);
2375 return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2376 }
2377
2378 /* Allocate a new continuous chunk. */
2379 rc = gmmR0AllocateOneChunk(pGMM, &pGMM->Private, pGVM->hSelf, GMMCHUNKTYPE_CONTINUOUS, &pChunk);
2380 if (RT_FAILURE(rc))
2381 {
2382 RTSemFastMutexRelease(pGMM->Mtx);
2383 return rc;
2384 }
2385
2386 /* Unlink the new chunk from the free list. */
2387 gmmR0UnlinkChunk(pChunk);
2388
2389 /* Allocate all pages. */
2390 gmmR0AllocatePage(pGMM, pGVM->hSelf, pChunk, &PageDesc);
2391 /* Return the first page as we'll use the whole chunk as one big page. */
2392 *pIdPage = PageDesc.idPage;
2393 *pHCPhys = PageDesc.HCPhysGCPhys;
2394
2395 for (unsigned i = 1; i < cPages; i++)
2396 gmmR0AllocatePage(pGMM, pGVM->hSelf, pChunk, &PageDesc);
2397
2398 /* Update accounting. */
2399 pGVM->gmm.s.Allocated.cBasePages += cPages;
2400 pGVM->gmm.s.cPrivatePages += cPages;
2401 pGMM->cAllocatedPages += cPages;
2402
2403 gmmR0LinkChunk(pChunk, &pGMM->Private);
2404 }
2405 else
2406 rc = VERR_INTERNAL_ERROR_5;
2407
2408 RTSemFastMutexRelease(pGMM->Mtx);
2409 LogFlow(("GMMR0AllocateLargePage: returns %Rrc\n", rc));
2410 return rc;
2411}
2412
2413
2414/**
2415 * Free a large page
2416 *
2417 * @returns VBox status code:
2418 * @param pVM Pointer to the shared VM structure.
2419 * @param idCpu VCPU id
2420 * @param idPage Large page id
2421 */
2422GMMR0DECL(int) GMMR0FreeLargePage(PVM pVM, VMCPUID idCpu, uint32_t idPage)
2423{
2424 LogFlow(("GMMR0FreeLargePage: pVM=%p idPage=%x\n", pVM, idPage));
2425
2426 /*
2427 * Validate, get basics and take the semaphore.
2428 */
2429 PGMM pGMM;
2430 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2431 PGVM pGVM;
2432 int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2433 if (RT_FAILURE(rc))
2434 return rc;
2435
2436 /* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
2437 if (pGMM->fLegacyAllocationMode)
2438 return VERR_NOT_SUPPORTED;
2439
2440 rc = RTSemFastMutexRequest(pGMM->Mtx);
2441 AssertRC(rc);
2442 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2443 {
2444 const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
2445
2446 if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages < cPages))
2447 {
2448 Log(("GMMR0FreeLargePage: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cBasePages, cPages));
2449 RTSemFastMutexRelease(pGMM->Mtx);
2450 return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2451 }
2452
2453 PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
2454 if ( RT_LIKELY(pPage)
2455 && RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
2456 {
2457 PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2458 Assert(pChunk);
2459 Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2460 Assert(pChunk->cPrivate > 0);
2461
2462 /* Release the memory immediately. */
2463 gmmR0FreeChunk(pGMM, NULL, pChunk);
2464
2465 /* Update accounting. */
2466 pGVM->gmm.s.Allocated.cBasePages -= cPages;
2467 pGVM->gmm.s.cPrivatePages -= cPages;
2468 pGMM->cAllocatedPages -= cPages;
2469 }
2470 else
2471 rc = VERR_GMM_PAGE_NOT_FOUND;
2472 }
2473 else
2474 rc = VERR_INTERNAL_ERROR_5;
2475
2476 RTSemFastMutexRelease(pGMM->Mtx);
2477 LogFlow(("GMMR0FreeLargePage: returns %Rrc\n", rc));
2478 return rc;
2479}
2480
2481
2482/**
2483 * VMMR0 request wrapper for GMMR0FreeLargePage.
2484 *
2485 * @returns see GMMR0FreeLargePage.
2486 * @param pVM Pointer to the shared VM structure.
2487 * @param idCpu VCPU id
2488 * @param pReq The request packet.
2489 */
2490GMMR0DECL(int) GMMR0FreeLargePageReq(PVM pVM, VMCPUID idCpu, PGMMFREELARGEPAGEREQ pReq)
2491{
2492 /*
2493 * Validate input and pass it on.
2494 */
2495 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2496 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2497 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMFREEPAGESREQ),
2498 ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(GMMFREEPAGESREQ)),
2499 VERR_INVALID_PARAMETER);
2500
2501 return GMMR0FreeLargePage(pVM, idCpu, pReq->idPage);
2502}
2503
2504/**
2505 * Frees a chunk, giving it back to the host OS.
2506 *
2507 * @param pGMM Pointer to the GMM instance.
2508 * @param pGVM This is set when called from GMMR0CleanupVM so we can
2509 * unmap and free the chunk in one go.
2510 * @param pChunk The chunk to free.
2511 */
2512static void gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
2513{
2514 Assert(pChunk->Core.Key != NIL_GMM_CHUNKID);
2515
2516 /*
2517 * Cleanup hack! Unmap the chunk from the callers address space.
2518 */
2519 if ( pChunk->cMappings
2520 && pGVM)
2521 gmmR0UnmapChunk(pGMM, pGVM, pChunk);
2522
2523 /*
2524 * If there are current mappings of the chunk, then request the
2525 * VMs to unmap them. Reposition the chunk in the free list so
2526 * it won't be a likely candidate for allocations.
2527 */
2528 if (pChunk->cMappings)
2529 {
2530 /** @todo R0 -> VM request */
2531 /* The chunk can be owned by more than one VM if fBoundMemoryMode is false! */
2532 Log(("gmmR0FreeChunk: chunk still has %d mappings; don't free!\n", pChunk->cMappings));
2533 }
2534 else
2535 {
2536 /*
2537 * Try free the memory object.
2538 */
2539 int rc = RTR0MemObjFree(pChunk->MemObj, false /* fFreeMappings */);
2540 if (RT_SUCCESS(rc))
2541 {
2542 pChunk->MemObj = NIL_RTR0MEMOBJ;
2543
2544 /*
2545 * Unlink it from everywhere.
2546 */
2547 gmmR0UnlinkChunk(pChunk);
2548
2549 PAVLU32NODECORE pCore = RTAvlU32Remove(&pGMM->pChunks, pChunk->Core.Key);
2550 Assert(pCore == &pChunk->Core); NOREF(pCore);
2551
2552 PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(pChunk->Core.Key)];
2553 if (pTlbe->pChunk == pChunk)
2554 {
2555 pTlbe->idChunk = NIL_GMM_CHUNKID;
2556 pTlbe->pChunk = NULL;
2557 }
2558
2559 Assert(pGMM->cChunks > 0);
2560 pGMM->cChunks--;
2561
2562 /*
2563 * Free the Chunk ID and struct.
2564 */
2565 gmmR0FreeChunkId(pGMM, pChunk->Core.Key);
2566 pChunk->Core.Key = NIL_GMM_CHUNKID;
2567
2568 RTMemFree(pChunk->paMappings);
2569 pChunk->paMappings = NULL;
2570
2571 RTMemFree(pChunk);
2572 }
2573 else
2574 AssertRC(rc);
2575 }
2576}
2577
2578
2579/**
2580 * Free page worker.
2581 *
2582 * The caller does all the statistic decrementing, we do all the incrementing.
2583 *
2584 * @param pGMM Pointer to the GMM instance data.
2585 * @param pChunk Pointer to the chunk this page belongs to.
2586 * @param idPage The Page ID.
2587 * @param pPage Pointer to the page.
2588 */
2589static void gmmR0FreePageWorker(PGMM pGMM, PGMMCHUNK pChunk, uint32_t idPage, PGMMPAGE pPage)
2590{
2591 Log3(("F pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x\n",
2592 pPage, pPage - &pChunk->aPages[0], idPage, pPage->Common.u2State, pChunk->iFreeHead)); NOREF(idPage);
2593
2594 /*
2595 * Put the page on the free list.
2596 */
2597 pPage->u = 0;
2598 pPage->Free.u2State = GMM_PAGE_STATE_FREE;
2599 Assert(pChunk->iFreeHead < RT_ELEMENTS(pChunk->aPages) || pChunk->iFreeHead == UINT16_MAX);
2600 pPage->Free.iNext = pChunk->iFreeHead;
2601 pChunk->iFreeHead = pPage - &pChunk->aPages[0];
2602
2603 /*
2604 * Update statistics (the cShared/cPrivate stats are up to date already),
2605 * and relink the chunk if necessary.
2606 */
2607 if ((pChunk->cFree & GMM_CHUNK_FREE_SET_MASK) == 0)
2608 {
2609 gmmR0UnlinkChunk(pChunk);
2610 pChunk->cFree++;
2611 gmmR0LinkChunk(pChunk, pChunk->cShared ? &pGMM->Shared : &pGMM->Private);
2612 }
2613 else
2614 {
2615 pChunk->cFree++;
2616 pChunk->pSet->cFreePages++;
2617
2618 /*
2619 * If the chunk becomes empty, consider giving memory back to the host OS.
2620 *
2621 * The current strategy is to try give it back if there are other chunks
2622 * in this free list, meaning if there are at least 240 free pages in this
2623 * category. Note that since there are probably mappings of the chunk,
2624 * it won't be freed up instantly, which probably screws up this logic
2625 * a bit...
2626 */
2627 if (RT_UNLIKELY( pChunk->cFree == GMM_CHUNK_NUM_PAGES
2628 && pChunk->pFreeNext
2629 && pChunk->pFreePrev
2630 && !pGMM->fLegacyAllocationMode))
2631 gmmR0FreeChunk(pGMM, NULL, pChunk);
2632 }
2633}
2634
2635
2636/**
2637 * Frees a shared page, the page is known to exist and be valid and such.
2638 *
2639 * @param pGMM Pointer to the GMM instance.
2640 * @param idPage The Page ID
2641 * @param pPage The page structure.
2642 */
2643DECLINLINE(void) gmmR0FreeSharedPage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage)
2644{
2645 PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2646 Assert(pChunk);
2647 Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2648 Assert(pChunk->cShared > 0);
2649 Assert(pGMM->cSharedPages > 0);
2650 Assert(pGMM->cAllocatedPages > 0);
2651 Assert(!pPage->Shared.cRefs);
2652
2653 pChunk->cShared--;
2654 pGMM->cAllocatedPages--;
2655 pGMM->cSharedPages--;
2656 gmmR0FreePageWorker(pGMM, pChunk, idPage, pPage);
2657}
2658
2659#ifdef VBOX_WITH_PAGE_SHARING
2660/**
2661 * Converts a private page to a shared page, the page is known to exist and be valid and such.
2662 *
2663 * @param pGMM Pointer to the GMM instance.
2664 * @param pGVM Pointer to the GVM instance.
2665 * @param HCPhys Host physical address
2666 * @param idPage The Page ID
2667 * @param pPage The page structure.
2668 */
2669DECLINLINE(void) gmmR0ConvertToSharedPage(PGMM pGMM, PGVM pGVM, RTHCPHYS HCPhys, uint32_t idPage, PGMMPAGE pPage)
2670{
2671 PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2672 Assert(pChunk);
2673 Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2674 Assert(GMM_PAGE_IS_PRIVATE(pPage));
2675
2676 pChunk->cPrivate--;
2677 pChunk->cShared++;
2678
2679 pGMM->cSharedPages++;
2680
2681 pGVM->gmm.s.cSharedPages++;
2682 pGVM->gmm.s.cPrivatePages--;
2683
2684 /* Modify the page structure. */
2685 pPage->Shared.pfn = (uint32_t)(uint64_t)(HCPhys >> PAGE_SHIFT);
2686 pPage->Shared.cRefs = 1;
2687 pPage->Common.u2State = GMM_PAGE_STATE_SHARED;
2688}
2689
2690/**
2691 * Increase the use count of a shared page, the page is known to exist and be valid and such.
2692 *
2693 * @param pGMM Pointer to the GMM instance.
2694 * @param pGVM Pointer to the GVM instance.
2695 * @param pPage The page structure.
2696 */
2697DECLINLINE(void) gmmR0UseSharedPage(PGMM pGMM, PGVM pGVM, PGMMPAGE pPage)
2698{
2699 Assert(pGMM->cSharedPages > 0);
2700 Assert(pGMM->cAllocatedPages > 0);
2701
2702 pGMM->cDuplicatePages++;
2703
2704 pPage->Shared.cRefs++;
2705 pGVM->gmm.s.cSharedPages++;
2706 pGVM->gmm.s.Allocated.cBasePages++;
2707}
2708#endif
2709
2710/**
2711 * Frees a private page, the page is known to exist and be valid and such.
2712 *
2713 * @param pGMM Pointer to the GMM instance.
2714 * @param idPage The Page ID
2715 * @param pPage The page structure.
2716 */
2717DECLINLINE(void) gmmR0FreePrivatePage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage)
2718{
2719 PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2720 Assert(pChunk);
2721 Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2722 Assert(pChunk->cPrivate > 0);
2723 Assert(pGMM->cAllocatedPages > 0);
2724
2725 pChunk->cPrivate--;
2726 pGMM->cAllocatedPages--;
2727 gmmR0FreePageWorker(pGMM, pChunk, idPage, pPage);
2728}
2729
2730/**
2731 * Common worker for GMMR0FreePages and GMMR0BalloonedPages.
2732 *
2733 * @returns VBox status code:
2734 * @retval xxx
2735 *
2736 * @param pGMM Pointer to the GMM instance data.
2737 * @param pGVM Pointer to the shared VM structure.
2738 * @param cPages The number of pages to free.
2739 * @param paPages Pointer to the page descriptors.
2740 * @param enmAccount The account this relates to.
2741 */
2742static int gmmR0FreePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
2743{
2744 /*
2745 * Check that the request isn't impossible wrt to the account status.
2746 */
2747 switch (enmAccount)
2748 {
2749 case GMMACCOUNT_BASE:
2750 if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages < cPages))
2751 {
2752 Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cBasePages, cPages));
2753 return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2754 }
2755 break;
2756 case GMMACCOUNT_SHADOW:
2757 if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cShadowPages < cPages))
2758 {
2759 Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cShadowPages, cPages));
2760 return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2761 }
2762 break;
2763 case GMMACCOUNT_FIXED:
2764 if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cFixedPages < cPages))
2765 {
2766 Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cFixedPages, cPages));
2767 return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2768 }
2769 break;
2770 default:
2771 AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2772 }
2773
2774 /*
2775 * Walk the descriptors and free the pages.
2776 *
2777 * Statistics (except the account) are being updated as we go along,
2778 * unlike the alloc code. Also, stop on the first error.
2779 */
2780 int rc = VINF_SUCCESS;
2781 uint32_t iPage;
2782 for (iPage = 0; iPage < cPages; iPage++)
2783 {
2784 uint32_t idPage = paPages[iPage].idPage;
2785 PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
2786 if (RT_LIKELY(pPage))
2787 {
2788 if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
2789 {
2790 if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
2791 {
2792 Assert(pGVM->gmm.s.cPrivatePages);
2793 pGVM->gmm.s.cPrivatePages--;
2794 gmmR0FreePrivatePage(pGMM, idPage, pPage);
2795 }
2796 else
2797 {
2798 Log(("gmmR0AllocatePages: #%#x/%#x: not owner! hGVM=%#x hSelf=%#x\n", iPage, idPage,
2799 pPage->Private.hGVM, pGVM->hSelf));
2800 rc = VERR_GMM_NOT_PAGE_OWNER;
2801 break;
2802 }
2803 }
2804 else if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
2805 {
2806 Assert(pGVM->gmm.s.cSharedPages);
2807 pGVM->gmm.s.cSharedPages--;
2808 Assert(pPage->Shared.cRefs);
2809 if (!--pPage->Shared.cRefs)
2810 {
2811 gmmR0FreeSharedPage(pGMM, idPage, pPage);
2812 }
2813 else
2814 {
2815 Assert(pGMM->cDuplicatePages);
2816 pGMM->cDuplicatePages--;
2817 }
2818 }
2819 else
2820 {
2821 Log(("gmmR0AllocatePages: #%#x/%#x: already free!\n", iPage, idPage));
2822 rc = VERR_GMM_PAGE_ALREADY_FREE;
2823 break;
2824 }
2825 }
2826 else
2827 {
2828 Log(("gmmR0AllocatePages: #%#x/%#x: not found!\n", iPage, idPage));
2829 rc = VERR_GMM_PAGE_NOT_FOUND;
2830 break;
2831 }
2832 paPages[iPage].idPage = NIL_GMM_PAGEID;
2833 }
2834
2835 /*
2836 * Update the account.
2837 */
2838 switch (enmAccount)
2839 {
2840 case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages -= iPage; break;
2841 case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages -= iPage; break;
2842 case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages -= iPage; break;
2843 default:
2844 AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2845 }
2846
2847 /*
2848 * Any threshold stuff to be done here?
2849 */
2850
2851 return rc;
2852}
2853
2854
2855/**
2856 * Free one or more pages.
2857 *
2858 * This is typically used at reset time or power off.
2859 *
2860 * @returns VBox status code:
2861 * @retval xxx
2862 *
2863 * @param pVM Pointer to the shared VM structure.
2864 * @param idCpu VCPU id
2865 * @param cPages The number of pages to allocate.
2866 * @param paPages Pointer to the page descriptors containing the Page IDs for each page.
2867 * @param enmAccount The account this relates to.
2868 * @thread EMT.
2869 */
2870GMMR0DECL(int) GMMR0FreePages(PVM pVM, VMCPUID idCpu, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
2871{
2872 LogFlow(("GMMR0FreePages: pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pVM, cPages, paPages, enmAccount));
2873
2874 /*
2875 * Validate input and get the basics.
2876 */
2877 PGMM pGMM;
2878 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2879 PGVM pGVM;
2880 int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2881 if (RT_FAILURE(rc))
2882 return rc;
2883
2884 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2885 AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
2886 AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
2887
2888 for (unsigned iPage = 0; iPage < cPages; iPage++)
2889 AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2890 /*|| paPages[iPage].idPage == NIL_GMM_PAGEID*/,
2891 ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2892
2893 /*
2894 * Take the semaphore and call the worker function.
2895 */
2896 rc = RTSemFastMutexRequest(pGMM->Mtx);
2897 AssertRC(rc);
2898 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2899 {
2900 rc = gmmR0FreePages(pGMM, pGVM, cPages, paPages, enmAccount);
2901 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2902 }
2903 else
2904 rc = VERR_INTERNAL_ERROR_5;
2905 RTSemFastMutexRelease(pGMM->Mtx);
2906 LogFlow(("GMMR0FreePages: returns %Rrc\n", rc));
2907 return rc;
2908}
2909
2910
2911/**
2912 * VMMR0 request wrapper for GMMR0FreePages.
2913 *
2914 * @returns see GMMR0FreePages.
2915 * @param pVM Pointer to the shared VM structure.
2916 * @param idCpu VCPU id
2917 * @param pReq The request packet.
2918 */
2919GMMR0DECL(int) GMMR0FreePagesReq(PVM pVM, VMCPUID idCpu, PGMMFREEPAGESREQ pReq)
2920{
2921 /*
2922 * Validate input and pass it on.
2923 */
2924 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2925 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2926 AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0]),
2927 ("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0])),
2928 VERR_INVALID_PARAMETER);
2929 AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[pReq->cPages]),
2930 ("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[pReq->cPages])),
2931 VERR_INVALID_PARAMETER);
2932
2933 return GMMR0FreePages(pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
2934}
2935
2936
2937/**
2938 * Report back on a memory ballooning request.
2939 *
2940 * The request may or may not have been initiated by the GMM. If it was initiated
2941 * by the GMM it is important that this function is called even if no pages were
2942 * ballooned.
2943 *
2944 * @returns VBox status code:
2945 * @retval VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH
2946 * @retval VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH
2947 * @retval VERR_GMM_OVERCOMMITTED_TRY_AGAIN_IN_A_BIT - reset condition
2948 * indicating that we won't necessarily have sufficient RAM to boot
2949 * the VM again and that it should pause until this changes (we'll try
2950 * balloon some other VM). (For standard deflate we have little choice
2951 * but to hope the VM won't use the memory that was returned to it.)
2952 *
2953 * @param pVM Pointer to the shared VM structure.
2954 * @param idCpu VCPU id
2955 * @param enmAction Inflate/deflate/reset
2956 * @param cBalloonedPages The number of pages that was ballooned.
2957 *
2958 * @thread EMT.
2959 */
2960GMMR0DECL(int) GMMR0BalloonedPages(PVM pVM, VMCPUID idCpu, GMMBALLOONACTION enmAction, uint32_t cBalloonedPages)
2961{
2962 LogFlow(("GMMR0BalloonedPages: pVM=%p enmAction=%d cBalloonedPages=%#x\n",
2963 pVM, enmAction, cBalloonedPages));
2964
2965 AssertMsgReturn(cBalloonedPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cBalloonedPages), VERR_INVALID_PARAMETER);
2966
2967 /*
2968 * Validate input and get the basics.
2969 */
2970 PGMM pGMM;
2971 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2972 PGVM pGVM;
2973 int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2974 if (RT_FAILURE(rc))
2975 return rc;
2976
2977 /*
2978 * Take the semaphore and do some more validations.
2979 */
2980 rc = RTSemFastMutexRequest(pGMM->Mtx);
2981 AssertRC(rc);
2982 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2983 {
2984 switch (enmAction)
2985 {
2986 case GMMBALLOONACTION_INFLATE:
2987 {
2988 if (RT_LIKELY(pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cBalloonedPages <= pGVM->gmm.s.Reserved.cBasePages))
2989 {
2990 /*
2991 * Record the ballooned memory.
2992 */
2993 pGMM->cBalloonedPages += cBalloonedPages;
2994 if (pGVM->gmm.s.cReqBalloonedPages)
2995 {
2996 /* Codepath never taken. Might be interesting in the future to request ballooned memory from guests in low memory conditions.. */
2997 AssertFailed();
2998
2999 pGVM->gmm.s.cBalloonedPages += cBalloonedPages;
3000 pGVM->gmm.s.cReqActuallyBalloonedPages += cBalloonedPages;
3001 Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx Req=%#llx Actual=%#llx (pending)\n", cBalloonedPages,
3002 pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqBalloonedPages, pGVM->gmm.s.cReqActuallyBalloonedPages));
3003 }
3004 else
3005 {
3006 pGVM->gmm.s.cBalloonedPages += cBalloonedPages;
3007 Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx (user)\n",
3008 cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages));
3009 }
3010 }
3011 else
3012 {
3013 Log(("GMMR0BalloonedPages: cBasePages=%#llx Total=%#llx cBalloonedPages=%#llx Reserved=%#llx\n",
3014 pGVM->gmm.s.Allocated.cBasePages, pGVM->gmm.s.cBalloonedPages, cBalloonedPages, pGVM->gmm.s.Reserved.cBasePages));
3015 rc = VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3016 }
3017 break;
3018 }
3019
3020 case GMMBALLOONACTION_DEFLATE:
3021 {
3022 /* Deflate. */
3023 if (pGVM->gmm.s.cBalloonedPages >= cBalloonedPages)
3024 {
3025 /*
3026 * Record the ballooned memory.
3027 */
3028 Assert(pGMM->cBalloonedPages >= cBalloonedPages);
3029 pGMM->cBalloonedPages -= cBalloonedPages;
3030 pGVM->gmm.s.cBalloonedPages -= cBalloonedPages;
3031 if (pGVM->gmm.s.cReqDeflatePages)
3032 {
3033 AssertFailed(); /* This is path is for later. */
3034 Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx Req=%#llx\n",
3035 cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqDeflatePages));
3036
3037 /*
3038 * Anything we need to do here now when the request has been completed?
3039 */
3040 pGVM->gmm.s.cReqDeflatePages = 0;
3041 }
3042 else
3043 Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx (user)\n",
3044 cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages));
3045 }
3046 else
3047 {
3048 Log(("GMMR0BalloonedPages: Total=%#llx cBalloonedPages=%#llx\n", pGVM->gmm.s.cBalloonedPages, cBalloonedPages));
3049 rc = VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH;
3050 }
3051 break;
3052 }
3053
3054 case GMMBALLOONACTION_RESET:
3055 {
3056 /* Reset to an empty balloon. */
3057 Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.cBalloonedPages);
3058
3059 pGMM->cBalloonedPages -= pGVM->gmm.s.cBalloonedPages;
3060 pGVM->gmm.s.cBalloonedPages = 0;
3061 break;
3062 }
3063
3064 default:
3065 rc = VERR_INVALID_PARAMETER;
3066 break;
3067 }
3068 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3069 }
3070 else
3071 rc = VERR_INTERNAL_ERROR_5;
3072
3073 RTSemFastMutexRelease(pGMM->Mtx);
3074 LogFlow(("GMMR0BalloonedPages: returns %Rrc\n", rc));
3075 return rc;
3076}
3077
3078
3079/**
3080 * VMMR0 request wrapper for GMMR0BalloonedPages.
3081 *
3082 * @returns see GMMR0BalloonedPages.
3083 * @param pVM Pointer to the shared VM structure.
3084 * @param idCpu VCPU id
3085 * @param pReq The request packet.
3086 */
3087GMMR0DECL(int) GMMR0BalloonedPagesReq(PVM pVM, VMCPUID idCpu, PGMMBALLOONEDPAGESREQ pReq)
3088{
3089 /*
3090 * Validate input and pass it on.
3091 */
3092 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3093 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3094 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMBALLOONEDPAGESREQ),
3095 ("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMBALLOONEDPAGESREQ)),
3096 VERR_INVALID_PARAMETER);
3097
3098 return GMMR0BalloonedPages(pVM, idCpu, pReq->enmAction, pReq->cBalloonedPages);
3099}
3100
3101/**
3102 * Return memory statistics for the hypervisor
3103 *
3104 * @returns VBox status code:
3105 * @param pVM Pointer to the shared VM structure.
3106 * @param pReq The request packet.
3107 */
3108GMMR0DECL(int) GMMR0QueryHypervisorMemoryStatsReq(PVM pVM, PGMMMEMSTATSREQ pReq)
3109{
3110 /*
3111 * Validate input and pass it on.
3112 */
3113 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3114 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3115 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
3116 ("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
3117 VERR_INVALID_PARAMETER);
3118
3119 /*
3120 * Validate input and get the basics.
3121 */
3122 PGMM pGMM;
3123 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3124 pReq->cAllocPages = pGMM->cAllocatedPages;
3125 pReq->cFreePages = (pGMM->cChunks << (GMM_CHUNK_SHIFT- PAGE_SHIFT)) - pGMM->cAllocatedPages;
3126 pReq->cBalloonedPages = pGMM->cBalloonedPages;
3127 pReq->cMaxPages = pGMM->cMaxPages;
3128 pReq->cSharedPages = pGMM->cDuplicatePages;
3129 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3130
3131 return VINF_SUCCESS;
3132}
3133
3134/**
3135 * Return memory statistics for the VM
3136 *
3137 * @returns VBox status code:
3138 * @param pVM Pointer to the shared VM structure.
3139 * @parma idCpu Cpu id.
3140 * @param pReq The request packet.
3141 */
3142GMMR0DECL(int) GMMR0QueryMemoryStatsReq(PVM pVM, VMCPUID idCpu, PGMMMEMSTATSREQ pReq)
3143{
3144 /*
3145 * Validate input and pass it on.
3146 */
3147 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3148 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3149 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
3150 ("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
3151 VERR_INVALID_PARAMETER);
3152
3153 /*
3154 * Validate input and get the basics.
3155 */
3156 PGMM pGMM;
3157 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3158 PGVM pGVM;
3159 int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3160 if (RT_FAILURE(rc))
3161 return rc;
3162
3163 /*
3164 * Take the semaphore and do some more validations.
3165 */
3166 rc = RTSemFastMutexRequest(pGMM->Mtx);
3167 AssertRC(rc);
3168 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3169 {
3170 pReq->cAllocPages = pGVM->gmm.s.Allocated.cBasePages;
3171 pReq->cBalloonedPages = pGVM->gmm.s.cBalloonedPages;
3172 pReq->cMaxPages = pGVM->gmm.s.Reserved.cBasePages;
3173 pReq->cFreePages = pReq->cMaxPages - pReq->cAllocPages;
3174 }
3175 else
3176 rc = VERR_INTERNAL_ERROR_5;
3177
3178 RTSemFastMutexRelease(pGMM->Mtx);
3179 LogFlow(("GMMR3QueryVMMemoryStats: returns %Rrc\n", rc));
3180 return rc;
3181}
3182
3183/**
3184 * Unmaps a chunk previously mapped into the address space of the current process.
3185 *
3186 * @returns VBox status code.
3187 * @param pGMM Pointer to the GMM instance data.
3188 * @param pGVM Pointer to the Global VM structure.
3189 * @param pChunk Pointer to the chunk to be unmapped.
3190 */
3191static int gmmR0UnmapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
3192{
3193 if (!pGMM->fLegacyAllocationMode)
3194 {
3195 /*
3196 * Find the mapping and try unmapping it.
3197 */
3198 for (uint32_t i = 0; i < pChunk->cMappings; i++)
3199 {
3200 Assert(pChunk->paMappings[i].pGVM && pChunk->paMappings[i].MapObj != NIL_RTR0MEMOBJ);
3201 if (pChunk->paMappings[i].pGVM == pGVM)
3202 {
3203 /* unmap */
3204 int rc = RTR0MemObjFree(pChunk->paMappings[i].MapObj, false /* fFreeMappings (NA) */);
3205 if (RT_SUCCESS(rc))
3206 {
3207 /* update the record. */
3208 pChunk->cMappings--;
3209 if (i < pChunk->cMappings)
3210 pChunk->paMappings[i] = pChunk->paMappings[pChunk->cMappings];
3211 pChunk->paMappings[pChunk->cMappings].MapObj = NIL_RTR0MEMOBJ;
3212 pChunk->paMappings[pChunk->cMappings].pGVM = NULL;
3213 }
3214 return rc;
3215 }
3216 }
3217 }
3218 else if (pChunk->hGVM == pGVM->hSelf)
3219 return VINF_SUCCESS;
3220
3221 Log(("gmmR0UnmapChunk: Chunk %#x is not mapped into pGVM=%p/%#x\n", pChunk->Core.Key, pGVM, pGVM->hSelf));
3222 return VERR_GMM_CHUNK_NOT_MAPPED;
3223}
3224
3225
3226/**
3227 * Maps a chunk into the user address space of the current process.
3228 *
3229 * @returns VBox status code.
3230 * @param pGMM Pointer to the GMM instance data.
3231 * @param pGVM Pointer to the Global VM structure.
3232 * @param pChunk Pointer to the chunk to be mapped.
3233 * @param ppvR3 Where to store the ring-3 address of the mapping.
3234 * In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
3235 * contain the address of the existing mapping.
3236 */
3237static int gmmR0MapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
3238{
3239 /*
3240 * If we're in legacy mode this is simple.
3241 */
3242 if (pGMM->fLegacyAllocationMode)
3243 {
3244 if (pChunk->hGVM != pGVM->hSelf)
3245 {
3246 Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
3247 return VERR_GMM_CHUNK_NOT_FOUND;
3248 }
3249
3250 *ppvR3 = RTR0MemObjAddressR3(pChunk->MemObj);
3251 return VINF_SUCCESS;
3252 }
3253
3254 /*
3255 * Check to see if the chunk is already mapped.
3256 */
3257 for (uint32_t i = 0; i < pChunk->cMappings; i++)
3258 {
3259 Assert(pChunk->paMappings[i].pGVM && pChunk->paMappings[i].MapObj != NIL_RTR0MEMOBJ);
3260 if (pChunk->paMappings[i].pGVM == pGVM)
3261 {
3262 *ppvR3 = RTR0MemObjAddressR3(pChunk->paMappings[i].MapObj);
3263 Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
3264#ifdef VBOX_WITH_PAGE_SHARING
3265 /* The ring-3 chunk cache can be out of sync; don't fail. */
3266 return VINF_SUCCESS;
3267#else
3268 return VERR_GMM_CHUNK_ALREADY_MAPPED;
3269#endif
3270 }
3271 }
3272
3273 /*
3274 * Do the mapping.
3275 */
3276 RTR0MEMOBJ MapObj;
3277 int rc = RTR0MemObjMapUser(&MapObj, pChunk->MemObj, (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
3278 if (RT_SUCCESS(rc))
3279 {
3280 /* reallocate the array? */
3281 if ((pChunk->cMappings & 1 /*7*/) == 0)
3282 {
3283 void *pvMappings = RTMemRealloc(pChunk->paMappings, (pChunk->cMappings + 2 /*8*/) * sizeof(pChunk->paMappings[0]));
3284 if (RT_UNLIKELY(!pvMappings))
3285 {
3286 rc = RTR0MemObjFree(MapObj, false /* fFreeMappings (NA) */);
3287 AssertRC(rc);
3288 return VERR_NO_MEMORY;
3289 }
3290 pChunk->paMappings = (PGMMCHUNKMAP)pvMappings;
3291 }
3292
3293 /* insert new entry */
3294 pChunk->paMappings[pChunk->cMappings].MapObj = MapObj;
3295 pChunk->paMappings[pChunk->cMappings].pGVM = pGVM;
3296 pChunk->cMappings++;
3297
3298 *ppvR3 = RTR0MemObjAddressR3(MapObj);
3299 }
3300
3301 return rc;
3302}
3303
3304/**
3305 * Check if a chunk is mapped into the specified VM
3306 *
3307 * @returns mapped yes/no
3308 * @param pGVM Pointer to the Global VM structure.
3309 * @param pChunk Pointer to the chunk to be mapped.
3310 * @param ppvR3 Where to store the ring-3 address of the mapping.
3311 */
3312static int gmmR0IsChunkMapped(PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
3313{
3314 /*
3315 * Check to see if the chunk is already mapped.
3316 */
3317 for (uint32_t i = 0; i < pChunk->cMappings; i++)
3318 {
3319 Assert(pChunk->paMappings[i].pGVM && pChunk->paMappings[i].MapObj != NIL_RTR0MEMOBJ);
3320 if (pChunk->paMappings[i].pGVM == pGVM)
3321 {
3322 *ppvR3 = RTR0MemObjAddressR3(pChunk->paMappings[i].MapObj);
3323 return true;
3324 }
3325 }
3326 *ppvR3 = NULL;
3327 return false;
3328}
3329
3330/**
3331 * Map a chunk and/or unmap another chunk.
3332 *
3333 * The mapping and unmapping applies to the current process.
3334 *
3335 * This API does two things because it saves a kernel call per mapping when
3336 * when the ring-3 mapping cache is full.
3337 *
3338 * @returns VBox status code.
3339 * @param pVM The VM.
3340 * @param idChunkMap The chunk to map. NIL_GMM_CHUNKID if nothing to map.
3341 * @param idChunkUnmap The chunk to unmap. NIL_GMM_CHUNKID if nothing to unmap.
3342 * @param ppvR3 Where to store the address of the mapped chunk. NULL is ok if nothing to map.
3343 * @thread EMT
3344 */
3345GMMR0DECL(int) GMMR0MapUnmapChunk(PVM pVM, uint32_t idChunkMap, uint32_t idChunkUnmap, PRTR3PTR ppvR3)
3346{
3347 LogFlow(("GMMR0MapUnmapChunk: pVM=%p idChunkMap=%#x idChunkUnmap=%#x ppvR3=%p\n",
3348 pVM, idChunkMap, idChunkUnmap, ppvR3));
3349
3350 /*
3351 * Validate input and get the basics.
3352 */
3353 PGMM pGMM;
3354 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3355 PGVM pGVM;
3356 int rc = GVMMR0ByVM(pVM, &pGVM);
3357 if (RT_FAILURE(rc))
3358 return rc;
3359
3360 AssertCompile(NIL_GMM_CHUNKID == 0);
3361 AssertMsgReturn(idChunkMap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkMap), VERR_INVALID_PARAMETER);
3362 AssertMsgReturn(idChunkUnmap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkUnmap), VERR_INVALID_PARAMETER);
3363
3364 if ( idChunkMap == NIL_GMM_CHUNKID
3365 && idChunkUnmap == NIL_GMM_CHUNKID)
3366 return VERR_INVALID_PARAMETER;
3367
3368 if (idChunkMap != NIL_GMM_CHUNKID)
3369 {
3370 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3371 *ppvR3 = NIL_RTR3PTR;
3372 }
3373
3374 /*
3375 * Take the semaphore and do the work.
3376 *
3377 * The unmapping is done last since it's easier to undo a mapping than
3378 * undoing an unmapping. The ring-3 mapping cache cannot not be so big
3379 * that it pushes the user virtual address space to within a chunk of
3380 * it it's limits, so, no problem here.
3381 */
3382 rc = RTSemFastMutexRequest(pGMM->Mtx);
3383 AssertRC(rc);
3384 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3385 {
3386 PGMMCHUNK pMap = NULL;
3387 if (idChunkMap != NIL_GVM_HANDLE)
3388 {
3389 pMap = gmmR0GetChunk(pGMM, idChunkMap);
3390 if (RT_LIKELY(pMap))
3391 rc = gmmR0MapChunk(pGMM, pGVM, pMap, ppvR3);
3392 else
3393 {
3394 Log(("GMMR0MapUnmapChunk: idChunkMap=%#x\n", idChunkMap));
3395 rc = VERR_GMM_CHUNK_NOT_FOUND;
3396 }
3397 }
3398
3399 if ( idChunkUnmap != NIL_GMM_CHUNKID
3400 && RT_SUCCESS(rc))
3401 {
3402 PGMMCHUNK pUnmap = gmmR0GetChunk(pGMM, idChunkUnmap);
3403 if (RT_LIKELY(pUnmap))
3404 rc = gmmR0UnmapChunk(pGMM, pGVM, pUnmap);
3405 else
3406 {
3407 Log(("GMMR0MapUnmapChunk: idChunkUnmap=%#x\n", idChunkUnmap));
3408 rc = VERR_GMM_CHUNK_NOT_FOUND;
3409 }
3410
3411 if (RT_FAILURE(rc) && pMap)
3412 gmmR0UnmapChunk(pGMM, pGVM, pMap);
3413 }
3414
3415 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3416 }
3417 else
3418 rc = VERR_INTERNAL_ERROR_5;
3419 RTSemFastMutexRelease(pGMM->Mtx);
3420
3421 LogFlow(("GMMR0MapUnmapChunk: returns %Rrc\n", rc));
3422 return rc;
3423}
3424
3425
3426/**
3427 * VMMR0 request wrapper for GMMR0MapUnmapChunk.
3428 *
3429 * @returns see GMMR0MapUnmapChunk.
3430 * @param pVM Pointer to the shared VM structure.
3431 * @param pReq The request packet.
3432 */
3433GMMR0DECL(int) GMMR0MapUnmapChunkReq(PVM pVM, PGMMMAPUNMAPCHUNKREQ pReq)
3434{
3435 /*
3436 * Validate input and pass it on.
3437 */
3438 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3439 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3440 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3441
3442 return GMMR0MapUnmapChunk(pVM, pReq->idChunkMap, pReq->idChunkUnmap, &pReq->pvR3);
3443}
3444
3445
3446/**
3447 * Legacy mode API for supplying pages.
3448 *
3449 * The specified user address points to a allocation chunk sized block that
3450 * will be locked down and used by the GMM when the GM asks for pages.
3451 *
3452 * @returns VBox status code.
3453 * @param pVM The VM.
3454 * @param idCpu VCPU id
3455 * @param pvR3 Pointer to the chunk size memory block to lock down.
3456 */
3457GMMR0DECL(int) GMMR0SeedChunk(PVM pVM, VMCPUID idCpu, RTR3PTR pvR3)
3458{
3459 /*
3460 * Validate input and get the basics.
3461 */
3462 PGMM pGMM;
3463 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3464 PGVM pGVM;
3465 int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3466 if (RT_FAILURE(rc))
3467 return rc;
3468
3469 AssertPtrReturn(pvR3, VERR_INVALID_POINTER);
3470 AssertReturn(!(PAGE_OFFSET_MASK & pvR3), VERR_INVALID_POINTER);
3471
3472 if (!pGMM->fLegacyAllocationMode)
3473 {
3474 Log(("GMMR0SeedChunk: not in legacy allocation mode!\n"));
3475 return VERR_NOT_SUPPORTED;
3476 }
3477
3478 /*
3479 * Lock the memory before taking the semaphore.
3480 */
3481 RTR0MEMOBJ MemObj;
3482 rc = RTR0MemObjLockUser(&MemObj, pvR3, GMM_CHUNK_SIZE, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
3483 if (RT_SUCCESS(rc))
3484 {
3485 /* Grab the lock. */
3486 rc = RTSemFastMutexRequest(pGMM->Mtx);
3487 AssertRC(rc);
3488 if (RT_SUCCESS(rc))
3489 {
3490 /*
3491 * Add a new chunk with our hGVM.
3492 */
3493 rc = gmmR0RegisterChunk(pGMM, &pGMM->Private, MemObj, pGVM->hSelf, GMMCHUNKTYPE_NON_CONTINUOUS);
3494 RTSemFastMutexRelease(pGMM->Mtx);
3495 }
3496
3497 if (RT_FAILURE(rc))
3498 RTR0MemObjFree(MemObj, false /* fFreeMappings */);
3499 }
3500
3501 LogFlow(("GMMR0SeedChunk: rc=%d (pvR3=%p)\n", rc, pvR3));
3502 return rc;
3503}
3504
3505typedef struct
3506{
3507 PAVLGCPTRNODECORE pNode;
3508 char *pszModuleName;
3509 char *pszVersion;
3510 VBOXOSFAMILY enmGuestOS;
3511} GMMFINDMODULEBYNAME, *PGMMFINDMODULEBYNAME;
3512
3513/**
3514 * Tree enumeration callback for finding identical modules by name and version
3515 */
3516DECLCALLBACK(int) gmmR0CheckForIdenticalModule(PAVLGCPTRNODECORE pNode, void *pvUser)
3517{
3518 PGMMFINDMODULEBYNAME pInfo = (PGMMFINDMODULEBYNAME)pvUser;
3519 PGMMSHAREDMODULE pModule = (PGMMSHAREDMODULE)pNode;
3520
3521 if ( pInfo
3522 && pInfo->enmGuestOS == pModule->enmGuestOS
3523 /** @todo replace with RTStrNCmp */
3524 && !strcmp(pModule->szName, pInfo->pszModuleName)
3525 && !strcmp(pModule->szVersion, pInfo->pszVersion))
3526 {
3527 pInfo->pNode = pNode;
3528 return 1; /* stop search */
3529 }
3530 return 0;
3531}
3532
3533
3534/**
3535 * Registers a new shared module for the VM
3536 *
3537 * @returns VBox status code.
3538 * @param pVM VM handle
3539 * @param idCpu VCPU id
3540 * @param enmGuestOS Guest OS type
3541 * @param pszModuleName Module name
3542 * @param pszVersion Module version
3543 * @param GCBaseAddr Module base address
3544 * @param cbModule Module size
3545 * @param cRegions Number of shared region descriptors
3546 * @param pRegions Shared region(s)
3547 */
3548GMMR0DECL(int) GMMR0RegisterSharedModule(PVM pVM, VMCPUID idCpu, VBOXOSFAMILY enmGuestOS, char *pszModuleName, char *pszVersion, RTGCPTR GCBaseAddr, uint32_t cbModule,
3549 unsigned cRegions, VMMDEVSHAREDREGIONDESC *pRegions)
3550{
3551#ifdef VBOX_WITH_PAGE_SHARING
3552 /*
3553 * Validate input and get the basics.
3554 */
3555 PGMM pGMM;
3556 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3557 PGVM pGVM;
3558 int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3559 if (RT_FAILURE(rc))
3560 return rc;
3561
3562 Log(("GMMR0RegisterSharedModule %s %s base %RGv size %x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule));
3563
3564 /*
3565 * Take the semaphore and do some more validations.
3566 */
3567 rc = RTSemFastMutexRequest(pGMM->Mtx);
3568 AssertRC(rc);
3569 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3570 {
3571 bool fNewModule = false;
3572
3573 /* Check if this module is already locally registered. */
3574 PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
3575 if (!pRecVM)
3576 {
3577 pRecVM = (PGMMSHAREDMODULEPERVM)RTMemAllocZ(RT_OFFSETOF(GMMSHAREDMODULEPERVM, aRegions[cRegions]));
3578 if (!pRecVM)
3579 {
3580 AssertFailed();
3581 rc = VERR_NO_MEMORY;
3582 goto end;
3583 }
3584 pRecVM->Core.Key = GCBaseAddr;
3585 pRecVM->cRegions = cRegions;
3586
3587 /* Save the region data as they can differ between VMs (address space scrambling or simply different loading order) */
3588 for (unsigned i = 0; i < cRegions; i++)
3589 {
3590 pRecVM->aRegions[i].GCRegionAddr = pRegions[i].GCRegionAddr;
3591 pRecVM->aRegions[i].cbRegion = RT_ALIGN_T(pRegions[i].cbRegion, PAGE_SIZE, uint32_t);
3592 pRecVM->aRegions[i].u32Alignment = 0;
3593 pRecVM->aRegions[i].paHCPhysPageID = NULL; /* unused */
3594 }
3595
3596 bool ret = RTAvlGCPtrInsert(&pGVM->gmm.s.pSharedModuleTree, &pRecVM->Core);
3597 Assert(ret);
3598
3599 Log(("GMMR0RegisterSharedModule: new local module %s\n", pszModuleName));
3600 fNewModule = true;
3601 }
3602 else
3603 rc = VINF_PGM_SHARED_MODULE_ALREADY_REGISTERED;
3604
3605 /* Check if this module is already globally registered. */
3606 PGMMSHAREDMODULE pGlobalModule = (PGMMSHAREDMODULE)RTAvlGCPtrGet(&pGMM->pGlobalSharedModuleTree, GCBaseAddr);
3607 if ( !pGlobalModule
3608 && enmGuestOS == VBOXOSFAMILY_Windows64)
3609 {
3610 /* Two identical copies of e.g. Win7 x64 will typically not have a similar virtual address space layout for dlls or kernel modules.
3611 * Try to find identical binaries based on name and version.
3612 */
3613 GMMFINDMODULEBYNAME Info;
3614
3615 Info.pNode = NULL;
3616 Info.pszVersion = pszVersion;
3617 Info.pszModuleName = pszModuleName;
3618 Info.enmGuestOS = enmGuestOS;
3619
3620 Log(("Try to find identical module %s\n", pszModuleName));
3621 int ret = RTAvlGCPtrDoWithAll(&pGMM->pGlobalSharedModuleTree, true /* fFromLeft */, gmmR0CheckForIdenticalModule, &Info);
3622 if (ret == 1)
3623 {
3624 Assert(Info.pNode);
3625 pGlobalModule = (PGMMSHAREDMODULE)Info.pNode;
3626 Log(("Found identical module at %RGv\n", pGlobalModule->Core.Key));
3627 }
3628 }
3629
3630 if (!pGlobalModule)
3631 {
3632 Assert(fNewModule);
3633 Assert(!pRecVM->fCollision);
3634
3635 pGlobalModule = (PGMMSHAREDMODULE)RTMemAllocZ(RT_OFFSETOF(GMMSHAREDMODULE, aRegions[cRegions]));
3636 if (!pGlobalModule)
3637 {
3638 AssertFailed();
3639 rc = VERR_NO_MEMORY;
3640 goto end;
3641 }
3642
3643 pGlobalModule->Core.Key = GCBaseAddr;
3644 pGlobalModule->cbModule = cbModule;
3645 /* Input limit already safe; no need to check again. */
3646 /** @todo replace with RTStrCopy */
3647 strcpy(pGlobalModule->szName, pszModuleName);
3648 strcpy(pGlobalModule->szVersion, pszVersion);
3649
3650 pGlobalModule->enmGuestOS = enmGuestOS;
3651 pGlobalModule->cRegions = cRegions;
3652
3653 for (unsigned i = 0; i < cRegions; i++)
3654 {
3655 Log(("New region %d base=%RGv size %x\n", i, pRegions[i].GCRegionAddr, pRegions[i].cbRegion));
3656 pGlobalModule->aRegions[i].GCRegionAddr = pRegions[i].GCRegionAddr;
3657 pGlobalModule->aRegions[i].cbRegion = RT_ALIGN_T(pRegions[i].cbRegion, PAGE_SIZE, uint32_t);
3658 pGlobalModule->aRegions[i].u32Alignment = 0;
3659 pGlobalModule->aRegions[i].paHCPhysPageID = NULL; /* uninitialized. */
3660 }
3661
3662 /* Save reference. */
3663 pRecVM->pGlobalModule = pGlobalModule;
3664 pRecVM->fCollision = false;
3665 pGlobalModule->cUsers++;
3666 rc = VINF_SUCCESS;
3667
3668 bool ret = RTAvlGCPtrInsert(&pGMM->pGlobalSharedModuleTree, &pGlobalModule->Core);
3669 Assert(ret);
3670
3671 Log(("GMMR0RegisterSharedModule: new global module %s\n", pszModuleName));
3672 }
3673 else
3674 {
3675 Assert(pGlobalModule->cUsers > 0);
3676
3677 /* Make sure the name and version are identical. */
3678 /** @todo replace with RTStrNCmp */
3679 if ( !strcmp(pGlobalModule->szName, pszModuleName)
3680 && !strcmp(pGlobalModule->szVersion, pszVersion))
3681 {
3682 /* Save reference. */
3683 pRecVM->pGlobalModule = pGlobalModule;
3684 if ( fNewModule
3685 || pRecVM->fCollision == true) /* colliding module unregistered and new one registered since the last check */
3686 {
3687 pGlobalModule->cUsers++;
3688 Log(("GMMR0RegisterSharedModule: using existing module %s cUser=%d!\n", pszModuleName, pGlobalModule->cUsers));
3689 }
3690 pRecVM->fCollision = false;
3691 rc = VINF_SUCCESS;
3692 }
3693 else
3694 {
3695 Log(("GMMR0RegisterSharedModule: module %s collision!\n", pszModuleName));
3696 pRecVM->fCollision = true;
3697 rc = VINF_PGM_SHARED_MODULE_COLLISION;
3698 goto end;
3699 }
3700 }
3701
3702 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3703 }
3704 else
3705 rc = VERR_INTERNAL_ERROR_5;
3706
3707end:
3708 RTSemFastMutexRelease(pGMM->Mtx);
3709 return rc;
3710#else
3711 return VERR_NOT_IMPLEMENTED;
3712#endif
3713}
3714
3715
3716/**
3717 * VMMR0 request wrapper for GMMR0RegisterSharedModule.
3718 *
3719 * @returns see GMMR0RegisterSharedModule.
3720 * @param pVM Pointer to the shared VM structure.
3721 * @param idCpu VCPU id
3722 * @param pReq The request packet.
3723 */
3724GMMR0DECL(int) GMMR0RegisterSharedModuleReq(PVM pVM, VMCPUID idCpu, PGMMREGISTERSHAREDMODULEREQ pReq)
3725{
3726 /*
3727 * Validate input and pass it on.
3728 */
3729 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3730 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3731 AssertMsgReturn(pReq->Hdr.cbReq >= sizeof(*pReq) && pReq->Hdr.cbReq == RT_UOFFSETOF(GMMREGISTERSHAREDMODULEREQ, aRegions[pReq->cRegions]), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3732
3733 /* Pass back return code in the request packet to preserve informational codes. (VMMR3CallR0 chokes on them) */
3734 pReq->rc = GMMR0RegisterSharedModule(pVM, idCpu, pReq->enmGuestOS, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule, pReq->cRegions, pReq->aRegions);
3735 return VINF_SUCCESS;
3736}
3737
3738/**
3739 * Unregisters a shared module for the VM
3740 *
3741 * @returns VBox status code.
3742 * @param pVM VM handle
3743 * @param idCpu VCPU id
3744 * @param pszModuleName Module name
3745 * @param pszVersion Module version
3746 * @param GCBaseAddr Module base address
3747 * @param cbModule Module size
3748 */
3749GMMR0DECL(int) GMMR0UnregisterSharedModule(PVM pVM, VMCPUID idCpu, char *pszModuleName, char *pszVersion, RTGCPTR GCBaseAddr, uint32_t cbModule)
3750{
3751#ifdef VBOX_WITH_PAGE_SHARING
3752 /*
3753 * Validate input and get the basics.
3754 */
3755 PGMM pGMM;
3756 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3757 PGVM pGVM;
3758 int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3759 if (RT_FAILURE(rc))
3760 return rc;
3761
3762 Log(("GMMR0UnregisterSharedModule %s %s base=%RGv size %x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule));
3763
3764 /*
3765 * Take the semaphore and do some more validations.
3766 */
3767 rc = RTSemFastMutexRequest(pGMM->Mtx);
3768 AssertRC(rc);
3769 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3770 {
3771 PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
3772 if (pRecVM)
3773 {
3774 /* Remove reference to global shared module. */
3775 if (!pRecVM->fCollision)
3776 {
3777 PGMMSHAREDMODULE pRec = pRecVM->pGlobalModule;
3778 Assert(pRec);
3779
3780 if (pRec) /* paranoia */
3781 {
3782 Assert(pRec->cUsers);
3783 pRec->cUsers--;
3784 if (pRec->cUsers == 0)
3785 {
3786 /* Free the ranges, but leave the pages intact as there might still be references; they will be cleared by the COW mechanism. */
3787 for (unsigned i = 0; i < pRec->cRegions; i++)
3788 if (pRec->aRegions[i].paHCPhysPageID)
3789 RTMemFree(pRec->aRegions[i].paHCPhysPageID);
3790
3791 Assert(pRec->Core.Key == GCBaseAddr || pRec->enmGuestOS == VBOXOSFAMILY_Windows64);
3792 Assert(pRec->cRegions == pRecVM->cRegions);
3793#ifdef VBOX_STRICT
3794 for (unsigned i = 0; i < pRecVM->cRegions; i++)
3795 {
3796 Assert(pRecVM->aRegions[i].GCRegionAddr == pRec->aRegions[i].GCRegionAddr);
3797 Assert(pRecVM->aRegions[i].cbRegion == pRec->aRegions[i].cbRegion);
3798 }
3799#endif
3800
3801 /* Remove from the tree and free memory. */
3802 RTAvlGCPtrRemove(&pGMM->pGlobalSharedModuleTree, pRec->Core.Key);
3803 RTMemFree(pRec);
3804 }
3805 }
3806 else
3807 rc = VERR_PGM_SHARED_MODULE_REGISTRATION_INCONSISTENCY;
3808 }
3809 else
3810 Assert(!pRecVM->pGlobalModule);
3811
3812 /* Remove from the tree and free memory. */
3813 RTAvlGCPtrRemove(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
3814 RTMemFree(pRecVM);
3815 }
3816 else
3817 rc = VERR_PGM_SHARED_MODULE_NOT_FOUND;
3818
3819 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3820 }
3821 else
3822 rc = VERR_INTERNAL_ERROR_5;
3823
3824 RTSemFastMutexRelease(pGMM->Mtx);
3825 return rc;
3826#else
3827 return VERR_NOT_IMPLEMENTED;
3828#endif
3829}
3830
3831/**
3832 * VMMR0 request wrapper for GMMR0UnregisterSharedModule.
3833 *
3834 * @returns see GMMR0UnregisterSharedModule.
3835 * @param pVM Pointer to the shared VM structure.
3836 * @param idCpu VCPU id
3837 * @param pReq The request packet.
3838 */
3839GMMR0DECL(int) GMMR0UnregisterSharedModuleReq(PVM pVM, VMCPUID idCpu, PGMMUNREGISTERSHAREDMODULEREQ pReq)
3840{
3841 /*
3842 * Validate input and pass it on.
3843 */
3844 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3845 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3846 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3847
3848 return GMMR0UnregisterSharedModule(pVM, idCpu, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule);
3849}
3850
3851
3852#ifdef VBOX_WITH_PAGE_SHARING
3853/**
3854 * Checks specified shared module range for changes
3855 *
3856 * Performs the following tasks:
3857 * - If a shared page is new, then it changes the GMM page type to shared and
3858 * returns it in the pPageDesc descriptor.
3859 * - If a shared page already exists, then it checks if the VM page is
3860 * identical and if so frees the VM page and returns the shared page in
3861 * pPageDesc descriptor.
3862 *
3863 * @remarks ASSUMES the caller has acquired the GMM semaphore!!
3864 *
3865 * @returns VBox status code.
3866 * @param pGMM Pointer to the GMM instance data.
3867 * @param pGVM Pointer to the GVM instance data.
3868 * @param pModule Module description
3869 * @param idxRegion Region index
3870 * @param idxPage Page index
3871 * @param paPageDesc Page descriptor
3872 */
3873GMMR0DECL(int) GMMR0SharedModuleCheckPage(PGVM pGVM, PGMMSHAREDMODULE pModule, unsigned idxRegion, unsigned idxPage,
3874 PGMMSHAREDPAGEDESC pPageDesc)
3875{
3876 int rc = VINF_SUCCESS;
3877 PGMM pGMM;
3878 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3879 unsigned cPages = pModule->aRegions[idxRegion].cbRegion >> PAGE_SHIFT;
3880
3881 AssertReturn(idxRegion < pModule->cRegions, VERR_INVALID_PARAMETER);
3882 AssertReturn(idxPage < cPages, VERR_INVALID_PARAMETER);
3883
3884 LogFlow(("GMMR0SharedModuleCheckRange %s base %RGv region %d idxPage %d\n", pModule->szName, pModule->Core.Key, idxRegion, idxPage));
3885
3886 PGMMSHAREDREGIONDESC pGlobalRegion = &pModule->aRegions[idxRegion];
3887 if (!pGlobalRegion->paHCPhysPageID)
3888 {
3889 /* First time; create a page descriptor array. */
3890 Log(("Allocate page descriptor array for %d pages\n", cPages));
3891 pGlobalRegion->paHCPhysPageID = (uint32_t *)RTMemAlloc(cPages * sizeof(*pGlobalRegion->paHCPhysPageID));
3892 if (!pGlobalRegion->paHCPhysPageID)
3893 {
3894 AssertFailed();
3895 rc = VERR_NO_MEMORY;
3896 goto end;
3897 }
3898 /* Invalidate all descriptors. */
3899 for (unsigned i = 0; i < cPages; i++)
3900 pGlobalRegion->paHCPhysPageID[i] = NIL_GMM_PAGEID;
3901 }
3902
3903 /* We've seen this shared page for the first time? */
3904 if (pGlobalRegion->paHCPhysPageID[idxPage] == NIL_GMM_PAGEID)
3905 {
3906new_shared_page:
3907 Log(("New shared page guest %RGp host %RHp\n", pPageDesc->GCPhys, pPageDesc->HCPhys));
3908
3909 /* Easy case: just change the internal page type. */
3910 PGMMPAGE pPage = gmmR0GetPage(pGMM, pPageDesc->uHCPhysPageId);
3911 if (!pPage)
3912 {
3913 Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #1 (GCPhys=%RGp HCPhys=%RHp idxRegion=%#x idxPage=%#x)\n",
3914 pPageDesc->uHCPhysPageId, pPageDesc->GCPhys, pPageDesc->HCPhys, idxRegion, idxPage));
3915 AssertFailed();
3916 rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
3917 goto end;
3918 }
3919
3920 AssertMsg(pPageDesc->GCPhys == (pPage->Private.pfn << 12), ("desc %RGp gmm %RGp\n", pPageDesc->HCPhys, (pPage->Private.pfn << 12)));
3921
3922 gmmR0ConvertToSharedPage(pGMM, pGVM, pPageDesc->HCPhys, pPageDesc->uHCPhysPageId, pPage);
3923
3924 /* Keep track of these references. */
3925 pGlobalRegion->paHCPhysPageID[idxPage] = pPageDesc->uHCPhysPageId;
3926 }
3927 else
3928 {
3929 uint8_t *pbLocalPage, *pbSharedPage;
3930 uint8_t *pbChunk;
3931 PGMMCHUNK pChunk;
3932
3933 Assert(pPageDesc->uHCPhysPageId != pGlobalRegion->paHCPhysPageID[idxPage]);
3934
3935 Log(("Replace existing page guest %RGp host %RHp id %x -> id %x\n", pPageDesc->GCPhys, pPageDesc->HCPhys, pPageDesc->uHCPhysPageId, pGlobalRegion->paHCPhysPageID[idxPage]));
3936
3937 /* Get the shared page source. */
3938 PGMMPAGE pPage = gmmR0GetPage(pGMM, pGlobalRegion->paHCPhysPageID[idxPage]);
3939 if (!pPage)
3940 {
3941 Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #2 (idxRegion=%#x idxPage=%#x)\n",
3942 pPageDesc->uHCPhysPageId, idxRegion, idxPage));
3943 AssertFailed();
3944 rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
3945 goto end;
3946 }
3947 if (pPage->Common.u2State != GMM_PAGE_STATE_SHARED)
3948 {
3949 /* Page was freed at some point; invalidate this entry. */
3950 /** @todo this isn't really bullet proof. */
3951 Log(("Old shared page was freed -> create a new one\n"));
3952 pGlobalRegion->paHCPhysPageID[idxPage] = NIL_GMM_PAGEID;
3953 goto new_shared_page; /* ugly goto */
3954 }
3955
3956 Log(("Replace existing page guest host %RHp -> %RHp\n", pPageDesc->HCPhys, ((uint64_t)pPage->Shared.pfn) << PAGE_SHIFT));
3957
3958 /* Calculate the virtual address of the local page. */
3959 pChunk = gmmR0GetChunk(pGMM, pPageDesc->uHCPhysPageId >> GMM_CHUNKID_SHIFT);
3960 if (pChunk)
3961 {
3962 if (!gmmR0IsChunkMapped(pGVM, pChunk, (PRTR3PTR)&pbChunk))
3963 {
3964 Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #3\n", pPageDesc->uHCPhysPageId));
3965 AssertFailed();
3966 rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
3967 goto end;
3968 }
3969 pbLocalPage = pbChunk + ((pPageDesc->uHCPhysPageId & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
3970 }
3971 else
3972 {
3973 Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #4\n", pPageDesc->uHCPhysPageId));
3974 AssertFailed();
3975 rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
3976 goto end;
3977 }
3978
3979 /* Calculate the virtual address of the shared page. */
3980 pChunk = gmmR0GetChunk(pGMM, pGlobalRegion->paHCPhysPageID[idxPage] >> GMM_CHUNKID_SHIFT);
3981 Assert(pChunk); /* can't fail as gmmR0GetPage succeeded. */
3982
3983 /* Get the virtual address of the physical page; map the chunk into the VM process if not already done. */
3984 if (!gmmR0IsChunkMapped(pGVM, pChunk, (PRTR3PTR)&pbChunk))
3985 {
3986 Log(("Map chunk into process!\n"));
3987 rc = gmmR0MapChunk(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk);
3988 if (rc != VINF_SUCCESS)
3989 {
3990 AssertRC(rc);
3991 goto end;
3992 }
3993 }
3994 pbSharedPage = pbChunk + ((pGlobalRegion->paHCPhysPageID[idxPage] & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
3995
3996 /** @todo write ASMMemComparePage. */
3997 if (memcmp(pbSharedPage, pbLocalPage, PAGE_SIZE))
3998 {
3999 Log(("Unexpected differences found between local and shared page; skip\n"));
4000 /* Signal to the caller that this one hasn't changed. */
4001 pPageDesc->uHCPhysPageId = NIL_GMM_PAGEID;
4002 goto end;
4003 }
4004
4005 /* Free the old local page. */
4006 GMMFREEPAGEDESC PageDesc;
4007
4008 PageDesc.idPage = pPageDesc->uHCPhysPageId;
4009 rc = gmmR0FreePages(pGMM, pGVM, 1, &PageDesc, GMMACCOUNT_BASE);
4010 AssertRCReturn(rc, rc);
4011
4012 gmmR0UseSharedPage(pGMM, pGVM, pPage);
4013
4014 /* Pass along the new physical address & page id. */
4015 pPageDesc->HCPhys = ((uint64_t)pPage->Shared.pfn) << PAGE_SHIFT;
4016 pPageDesc->uHCPhysPageId = pGlobalRegion->paHCPhysPageID[idxPage];
4017 }
4018end:
4019 return rc;
4020}
4021
4022/**
4023 * RTAvlU32Destroy callback.
4024 *
4025 * @returns 0
4026 * @param pNode The node to destroy.
4027 * @param pvGVM The GVM handle.
4028 */
4029static DECLCALLBACK(int) gmmR0CleanupSharedModule(PAVLGCPTRNODECORE pNode, void *pvGVM)
4030{
4031 PGVM pGVM = (PGVM)pvGVM;
4032 PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)pNode;
4033 PGMM pGMM;
4034 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4035
4036 Assert(pRecVM->pGlobalModule || pRecVM->fCollision);
4037 if (pRecVM->pGlobalModule)
4038 {
4039 PGMMSHAREDMODULE pRec = pRecVM->pGlobalModule;
4040 Assert(pRec);
4041 Assert(pRec->cUsers);
4042
4043 Log(("gmmR0CleanupSharedModule: %s %s cUsers=%d\n", pRec->szName, pRec->szVersion, pRec->cUsers));
4044 pRec->cUsers--;
4045 if (pRec->cUsers == 0)
4046 {
4047 for (unsigned i = 0; i < pRec->cRegions; i++)
4048 if (pRec->aRegions[i].paHCPhysPageID)
4049 RTMemFree(pRec->aRegions[i].paHCPhysPageID);
4050
4051 /* Remove from the tree and free memory. */
4052 RTAvlGCPtrRemove(&pGMM->pGlobalSharedModuleTree, pRec->Core.Key);
4053 RTMemFree(pRec);
4054 }
4055 }
4056 RTMemFree(pRecVM);
4057 return 0;
4058}
4059#endif
4060
4061/**
4062 * Removes all shared modules for the specified VM
4063 *
4064 * @returns VBox status code.
4065 * @param pVM VM handle
4066 * @param idCpu VCPU id
4067 */
4068GMMR0DECL(int) GMMR0ResetSharedModules(PVM pVM, VMCPUID idCpu)
4069{
4070#ifdef VBOX_WITH_PAGE_SHARING
4071 /*
4072 * Validate input and get the basics.
4073 */
4074 PGMM pGMM;
4075 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4076 PGVM pGVM;
4077 int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
4078 if (RT_FAILURE(rc))
4079 return rc;
4080
4081 /*
4082 * Take the semaphore and do some more validations.
4083 */
4084 rc = RTSemFastMutexRequest(pGMM->Mtx);
4085 AssertRC(rc);
4086 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4087 {
4088 Log(("GMMR0ResetSharedModules\n"));
4089 RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, pGVM);
4090
4091 rc = VINF_SUCCESS;
4092 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4093 }
4094 else
4095 rc = VERR_INTERNAL_ERROR_5;
4096
4097 RTSemFastMutexRelease(pGMM->Mtx);
4098 return rc;
4099#else
4100 return VERR_NOT_IMPLEMENTED;
4101#endif
4102}
4103
4104#ifdef VBOX_WITH_PAGE_SHARING
4105typedef struct
4106{
4107 PGVM pGVM;
4108 VMCPUID idCpu;
4109 int rc;
4110} GMMCHECKSHAREDMODULEINFO, *PGMMCHECKSHAREDMODULEINFO;
4111
4112/**
4113 * Tree enumeration callback for checking a shared module.
4114 */
4115DECLCALLBACK(int) gmmR0CheckSharedModule(PAVLGCPTRNODECORE pNode, void *pvUser)
4116{
4117 PGMMCHECKSHAREDMODULEINFO pInfo = (PGMMCHECKSHAREDMODULEINFO)pvUser;
4118 PGMMSHAREDMODULEPERVM pLocalModule = (PGMMSHAREDMODULEPERVM)pNode;
4119 PGMMSHAREDMODULE pGlobalModule = pLocalModule->pGlobalModule;
4120
4121 if ( !pLocalModule->fCollision
4122 && pGlobalModule)
4123 {
4124 Log(("gmmR0CheckSharedModule: check %s %s base=%RGv size=%x collision=%d\n", pGlobalModule->szName, pGlobalModule->szVersion, pGlobalModule->Core.Key, pGlobalModule->cbModule, pLocalModule->fCollision));
4125 pInfo->rc = PGMR0SharedModuleCheck(pInfo->pGVM->pVM, pInfo->pGVM, pInfo->idCpu, pGlobalModule, pLocalModule->cRegions, pLocalModule->aRegions);
4126 if (RT_FAILURE(pInfo->rc))
4127 return 1; /* stop enumeration. */
4128 }
4129 return 0;
4130}
4131#endif
4132
4133#ifdef DEBUG_sandervl
4134/**
4135 * Setup for a GMMR0CheckSharedModules call (to allow log flush jumps back to ring 3)
4136 *
4137 * @returns VBox status code.
4138 * @param pVM VM handle
4139 */
4140GMMR0DECL(int) GMMR0CheckSharedModulesStart(PVM pVM)
4141{
4142 /*
4143 * Validate input and get the basics.
4144 */
4145 PGMM pGMM;
4146 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4147
4148 /*
4149 * Take the semaphore and do some more validations.
4150 */
4151 int rc = RTSemFastMutexRequest(pGMM->Mtx);
4152 AssertRC(rc);
4153 if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4154 rc = VERR_INTERNAL_ERROR_5;
4155 else
4156 rc = VINF_SUCCESS;
4157
4158 return rc;
4159}
4160
4161/**
4162 * Clean up after a GMMR0CheckSharedModules call (to allow log flush jumps back to ring 3)
4163 *
4164 * @returns VBox status code.
4165 * @param pVM VM handle
4166 */
4167GMMR0DECL(int) GMMR0CheckSharedModulesEnd(PVM pVM)
4168{
4169 /*
4170 * Validate input and get the basics.
4171 */
4172 PGMM pGMM;
4173 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4174
4175 RTSemFastMutexRelease(pGMM->Mtx);
4176 return VINF_SUCCESS;
4177}
4178#endif
4179
4180/**
4181 * Check all shared modules for the specified VM
4182 *
4183 * @returns VBox status code.
4184 * @param pVM VM handle
4185 * @param pVCpu VMCPU handle
4186 */
4187GMMR0DECL(int) GMMR0CheckSharedModules(PVM pVM, PVMCPU pVCpu)
4188{
4189#ifdef VBOX_WITH_PAGE_SHARING
4190 /*
4191 * Validate input and get the basics.
4192 */
4193 PGMM pGMM;
4194 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4195 PGVM pGVM;
4196 int rc = GVMMR0ByVMAndEMT(pVM, pVCpu->idCpu, &pGVM);
4197 if (RT_FAILURE(rc))
4198 return rc;
4199
4200# ifndef DEBUG_sandervl
4201 /*
4202 * Take the semaphore and do some more validations.
4203 */
4204 rc = RTSemFastMutexRequest(pGMM->Mtx);
4205 AssertRC(rc);
4206# endif
4207 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4208 {
4209 GMMCHECKSHAREDMODULEINFO Info;
4210
4211 Log(("GMMR0CheckSharedModules\n"));
4212 Info.pGVM = pGVM;
4213 Info.idCpu = pVCpu->idCpu;
4214 Info.rc = VINF_SUCCESS;
4215
4216 RTAvlGCPtrDoWithAll(&pGVM->gmm.s.pSharedModuleTree, true /* fFromLeft */, gmmR0CheckSharedModule, &Info);
4217
4218 rc = Info.rc;
4219
4220 Log(("GMMR0CheckSharedModules done!\n"));
4221
4222 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4223 }
4224 else
4225 rc = VERR_INTERNAL_ERROR_5;
4226
4227# ifndef DEBUG_sandervl
4228 RTSemFastMutexRelease(pGMM->Mtx);
4229# endif
4230 return rc;
4231#else
4232 return VERR_NOT_IMPLEMENTED;
4233#endif
4234}
4235
4236#if defined(VBOX_STRICT) && HC_ARCH_BITS == 64
4237typedef struct
4238{
4239 PGVM pGVM;
4240 PGMM pGMM;
4241 uint8_t *pSourcePage;
4242 bool fFoundDuplicate;
4243} GMMFINDDUPPAGEINFO, *PGMMFINDDUPPAGEINFO;
4244
4245/**
4246 * RTAvlU32DoWithAll callback.
4247 *
4248 * @returns 0
4249 * @param pNode The node to search.
4250 * @param pvInfo Pointer to the input parameters
4251 */
4252static DECLCALLBACK(int) gmmR0FindDupPageInChunk(PAVLU32NODECORE pNode, void *pvInfo)
4253{
4254 PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
4255 PGMMFINDDUPPAGEINFO pInfo = (PGMMFINDDUPPAGEINFO)pvInfo;
4256 PGVM pGVM = pInfo->pGVM;
4257 PGMM pGMM = pInfo->pGMM;
4258 uint8_t *pbChunk;
4259
4260 /* Only take chunks not mapped into this VM process; not entirely correct. */
4261 if (!gmmR0IsChunkMapped(pGVM, pChunk, (PRTR3PTR)&pbChunk))
4262 {
4263 int rc = gmmR0MapChunk(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk);
4264 if (rc != VINF_SUCCESS)
4265 goto end;
4266
4267 /*
4268 * Look for duplicate pages
4269 */
4270 unsigned iPage = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
4271 while (iPage-- > 0)
4272 {
4273 if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
4274 {
4275 uint8_t *pbDestPage = pbChunk + (iPage << PAGE_SHIFT);
4276
4277 if (!memcmp(pInfo->pSourcePage, pbDestPage, PAGE_SIZE))
4278 {
4279 pInfo->fFoundDuplicate = true;
4280 break;
4281 }
4282 }
4283 }
4284 gmmR0UnmapChunk(pGMM, pGVM, pChunk);
4285 }
4286end:
4287 if (pInfo->fFoundDuplicate)
4288 return 1; /* stop search */
4289 else
4290 return 0;
4291}
4292
4293/**
4294 * Find a duplicate of the specified page in other active VMs
4295 *
4296 * @returns VBox status code.
4297 * @param pVM VM handle
4298 * @param pReq Request packet
4299 */
4300GMMR0DECL(int) GMMR0FindDuplicatePageReq(PVM pVM, PGMMFINDDUPLICATEPAGEREQ pReq)
4301{
4302 /*
4303 * Validate input and pass it on.
4304 */
4305 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
4306 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
4307 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
4308
4309 PGMM pGMM;
4310 GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4311
4312 /*
4313 * Take the semaphore and do some more validations.
4314 */
4315 int rc = RTSemFastMutexRequest(pGMM->Mtx);
4316 AssertRC(rc);
4317 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4318 {
4319 PGVM pGVM;
4320 rc = GVMMR0ByVM(pVM, &pGVM);
4321 if (RT_FAILURE(rc))
4322 goto end;
4323
4324 uint8_t *pbChunk;
4325 PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, pReq->idPage >> GMM_CHUNKID_SHIFT);
4326 if (!pChunk)
4327 {
4328 AssertFailed();
4329 goto end;
4330 }
4331
4332 if (!gmmR0IsChunkMapped(pGVM, pChunk, (PRTR3PTR)&pbChunk))
4333 {
4334 AssertFailed();
4335 goto end;
4336 }
4337
4338 uint8_t *pbSourcePage = pbChunk + ((pReq->idPage & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
4339
4340 PGMMPAGE pPage = gmmR0GetPage(pGMM, pReq->idPage);
4341 if (!pPage)
4342 {
4343 AssertFailed();
4344 rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4345 goto end;
4346 }
4347 GMMFINDDUPPAGEINFO Info;
4348
4349 Info.pGVM = pGVM;
4350 Info.pGMM = pGMM;
4351 Info.pSourcePage = pbSourcePage;
4352 Info.fFoundDuplicate = false;
4353 RTAvlU32DoWithAll(&pGMM->pChunks, true /* fFromLeft */, gmmR0FindDupPageInChunk, &Info);
4354
4355 pReq->fDuplicate = Info.fFoundDuplicate;
4356 }
4357 else
4358 rc = VERR_INTERNAL_ERROR_5;
4359
4360end:
4361 RTSemFastMutexRelease(pGMM->Mtx);
4362 return rc;
4363}
4364
4365#endif /* VBOX_STRICT && HC_ARCH_BITS == 64 */
4366
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette