VirtualBox

source: vbox/trunk/src/VBox/VMM/PGMPool.cpp@ 33162

Last change on this file since 33162 was 33162, checked in by vboxsync, 14 years ago

Reduce the maximum number of extents allocated from the hyper heap. Otherwise it eats up 256kb (out of 640k for a single cpu VM), which quickly leads up to out of memory guru meditations.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 50.6 KB
Line 
1/* $Id: PGMPool.cpp 33162 2010-10-15 14:21:06Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/** @page pg_pgm_pool PGM Shadow Page Pool
19 *
20 * Motivations:
21 * -# Relationship between shadow page tables and physical guest pages. This
22 * should allow us to skip most of the global flushes now following access
23 * handler changes. The main expense is flushing shadow pages.
24 * -# Limit the pool size if necessary (default is kind of limitless).
25 * -# Allocate shadow pages from RC. We use to only do this in SyncCR3.
26 * -# Required for 64-bit guests.
27 * -# Combining the PD cache and page pool in order to simplify caching.
28 *
29 *
30 * @section sec_pgm_pool_outline Design Outline
31 *
32 * The shadow page pool tracks pages used for shadowing paging structures (i.e.
33 * page tables, page directory, page directory pointer table and page map
34 * level-4). Each page in the pool has an unique identifier. This identifier is
35 * used to link a guest physical page to a shadow PT. The identifier is a
36 * non-zero value and has a relativly low max value - say 14 bits. This makes it
37 * possible to fit it into the upper bits of the of the aHCPhys entries in the
38 * ram range.
39 *
40 * By restricting host physical memory to the first 48 bits (which is the
41 * announced physical memory range of the K8L chip (scheduled for 2008)), we
42 * can safely use the upper 16 bits for shadow page ID and reference counting.
43 *
44 * Update: The 48 bit assumption will be lifted with the new physical memory
45 * management (PGMPAGE), so we won't have any trouble when someone stuffs 2TB
46 * into a box in some years.
47 *
48 * Now, it's possible for a page to be aliased, i.e. mapped by more than one PT
49 * or PD. This is solved by creating a list of physical cross reference extents
50 * when ever this happens. Each node in the list (extent) is can contain 3 page
51 * pool indexes. The list it self is chained using indexes into the paPhysExt
52 * array.
53 *
54 *
55 * @section sec_pgm_pool_life Life Cycle of a Shadow Page
56 *
57 * -# The SyncPT function requests a page from the pool.
58 * The request includes the kind of page it is (PT/PD, PAE/legacy), the
59 * address of the page it's shadowing, and more.
60 * -# The pool responds to the request by allocating a new page.
61 * When the cache is enabled, it will first check if it's in the cache.
62 * Should the pool be exhausted, one of two things can be done:
63 * -# Flush the whole pool and current CR3.
64 * -# Use the cache to find a page which can be flushed (~age).
65 * -# The SyncPT function will sync one or more pages and insert it into the
66 * shadow PD.
67 * -# The SyncPage function may sync more pages on a later \#PFs.
68 * -# The page is freed / flushed in SyncCR3 (perhaps) and some other cases.
69 * When caching is enabled, the page isn't flush but remains in the cache.
70 *
71 *
72 * @section sec_pgm_pool_impl Monitoring
73 *
74 * We always monitor PAGE_SIZE chunks of memory. When we've got multiple shadow
75 * pages for the same PAGE_SIZE of guest memory (PAE and mixed PD/PT) the pages
76 * sharing the monitor get linked using the iMonitoredNext/Prev. The head page
77 * is the pvUser to the access handlers.
78 *
79 *
80 * @section sec_pgm_pool_impl Implementation
81 *
82 * The pool will take pages from the MM page pool. The tracking data
83 * (attributes, bitmaps and so on) are allocated from the hypervisor heap. The
84 * pool content can be accessed both by using the page id and the physical
85 * address (HC). The former is managed by means of an array, the latter by an
86 * offset based AVL tree.
87 *
88 * Flushing of a pool page means that we iterate the content (we know what kind
89 * it is) and updates the link information in the ram range.
90 *
91 * ...
92 */
93
94
95/*******************************************************************************
96* Header Files *
97*******************************************************************************/
98#define LOG_GROUP LOG_GROUP_PGM_POOL
99#include <VBox/pgm.h>
100#include <VBox/mm.h>
101#include "PGMInternal.h"
102#include <VBox/vm.h>
103#include "PGMInline.h"
104
105#include <VBox/log.h>
106#include <VBox/err.h>
107#include <iprt/asm.h>
108#include <iprt/string.h>
109#include <VBox/dbg.h>
110
111
112/*******************************************************************************
113* Internal Functions *
114*******************************************************************************/
115static DECLCALLBACK(int) pgmR3PoolAccessHandler(PVM pVM, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf, PGMACCESSTYPE enmAccessType, void *pvUser);
116#ifdef VBOX_WITH_DEBUGGER
117static DECLCALLBACK(int) pgmR3PoolCmdCheck(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PVM pVM, PCDBGCVAR paArgs, unsigned cArgs, PDBGCVAR pResult);
118#endif
119
120#ifdef VBOX_WITH_DEBUGGER
121/** Command descriptors. */
122static const DBGCCMD g_aCmds[] =
123{
124 /* pszCmd, cArgsMin, cArgsMax, paArgDesc, cArgDescs, pResultDesc, fFlags, pfnHandler pszSyntax, ....pszDescription */
125 { "pgmpoolcheck", 0, 0, NULL, 0, NULL, 0, pgmR3PoolCmdCheck, "", "Check the pgm pool pages." },
126};
127#endif
128
129/**
130 * Initalizes the pool
131 *
132 * @returns VBox status code.
133 * @param pVM The VM handle.
134 */
135int pgmR3PoolInit(PVM pVM)
136{
137 AssertCompile(NIL_PGMPOOL_IDX == 0);
138 /* pPage->cLocked is an unsigned byte. */
139 AssertCompile(VMM_MAX_CPU_COUNT <= 255);
140
141 /*
142 * Query Pool config.
143 */
144 PCFGMNODE pCfg = CFGMR3GetChild(CFGMR3GetRoot(pVM), "/PGM/Pool");
145
146 /* Default pgm pool size equals 1024 pages. */
147 uint16_t cMaxPages = 4*_1M >> PAGE_SHIFT;
148
149#if HC_ARCH_BITS == 64
150 uint64_t cbRam = 0;
151 CFGMR3QueryU64Def(CFGMR3GetRoot(pVM), "RamSize", &cbRam, 0);
152
153 /* We should increase the pgm pool size for guests with more than 2 GB of ram */
154 if (cbRam >= UINT64_C(2) * _1G)
155 {
156 /* In the nested paging case we require 2 + 513 * (cbRam/1GB) pages to
157 * store the entire page table descriptors.
158 */
159 uint64_t u64MaxPages = cbRam / (_1G / UINT64_C(512));
160 if (u64MaxPages > PGMPOOL_IDX_LAST)
161 cMaxPages = PGMPOOL_IDX_LAST;
162 else
163 cMaxPages = (uint16_t)u64MaxPages;
164 }
165#endif
166
167 /** @cfgm{/PGM/Pool/MaxPages, uint16_t, #pages, 16, 0x3fff, 1024}
168 * The max size of the shadow page pool in pages. The pool will grow dynamically
169 * up to this limit.
170 */
171 int rc = CFGMR3QueryU16Def(pCfg, "MaxPages", &cMaxPages, cMaxPages);
172 AssertLogRelRCReturn(rc, rc);
173 AssertLogRelMsgReturn(cMaxPages <= PGMPOOL_IDX_LAST && cMaxPages >= RT_ALIGN(PGMPOOL_IDX_FIRST, 16),
174 ("cMaxPages=%u (%#x)\n", cMaxPages, cMaxPages), VERR_INVALID_PARAMETER);
175 cMaxPages = RT_ALIGN(cMaxPages, 16);
176
177 /** @cfgm{/PGM/Pool/MaxUsers, uint16_t, #users, MaxUsers, 32K, MaxPages*2}
178 * The max number of shadow page user tracking records. Each shadow page has
179 * zero of other shadow pages (or CR3s) that references it, or uses it if you
180 * like. The structures describing these relationships are allocated from a
181 * fixed sized pool. This configuration variable defines the pool size.
182 */
183 uint16_t cMaxUsers;
184 rc = CFGMR3QueryU16Def(pCfg, "MaxUsers", &cMaxUsers, cMaxPages * 2);
185 AssertLogRelRCReturn(rc, rc);
186 AssertLogRelMsgReturn(cMaxUsers >= cMaxPages && cMaxPages <= _32K,
187 ("cMaxUsers=%u (%#x)\n", cMaxUsers, cMaxUsers), VERR_INVALID_PARAMETER);
188
189 /** @cfgm{/PGM/Pool/MaxPhysExts, uint16_t, #extents, 16, MaxPages * 2, MAX(MaxPages*2,0x3fff)}
190 * The max number of extents for tracking aliased guest pages.
191 */
192 uint16_t cMaxPhysExts;
193 rc = CFGMR3QueryU16Def(pCfg, "MaxPhysExts", &cMaxPhysExts, RT_MAX(cMaxPages * 2, 2048 /* 2k max as this eat too much hyper heap */));
194 AssertLogRelRCReturn(rc, rc);
195 AssertLogRelMsgReturn(cMaxPhysExts >= 16 && cMaxPages <= PGMPOOL_IDX_LAST,
196 ("cMaxPhysExts=%u (%#x)\n", cMaxPhysExts, cMaxPhysExts), VERR_INVALID_PARAMETER);
197
198 /** @cfgm{/PGM/Pool/ChacheEnabled, bool, true}
199 * Enables or disabling caching of shadow pages. Chaching means that we will try
200 * reuse shadow pages instead of recreating them everything SyncCR3, SyncPT or
201 * SyncPage requests one. When reusing a shadow page, we can save time
202 * reconstructing it and it's children.
203 */
204 bool fCacheEnabled;
205 rc = CFGMR3QueryBoolDef(pCfg, "CacheEnabled", &fCacheEnabled, true);
206 AssertLogRelRCReturn(rc, rc);
207
208 LogRel(("pgmR3PoolInit: cMaxPages=%#RX16 cMaxUsers=%#RX16 cMaxPhysExts=%#RX16 fCacheEnable=%RTbool\n",
209 cMaxPages, cMaxUsers, cMaxPhysExts, fCacheEnabled));
210
211 /*
212 * Allocate the data structures.
213 */
214 uint32_t cb = RT_OFFSETOF(PGMPOOL, aPages[cMaxPages]);
215 cb += cMaxUsers * sizeof(PGMPOOLUSER);
216 cb += cMaxPhysExts * sizeof(PGMPOOLPHYSEXT);
217 PPGMPOOL pPool;
218 rc = MMR3HyperAllocOnceNoRel(pVM, cb, 0, MM_TAG_PGM_POOL, (void **)&pPool);
219 if (RT_FAILURE(rc))
220 return rc;
221 pVM->pgm.s.pPoolR3 = pPool;
222 pVM->pgm.s.pPoolR0 = MMHyperR3ToR0(pVM, pPool);
223 pVM->pgm.s.pPoolRC = MMHyperR3ToRC(pVM, pPool);
224
225 /*
226 * Initialize it.
227 */
228 pPool->pVMR3 = pVM;
229 pPool->pVMR0 = pVM->pVMR0;
230 pPool->pVMRC = pVM->pVMRC;
231 pPool->cMaxPages = cMaxPages;
232 pPool->cCurPages = PGMPOOL_IDX_FIRST;
233 pPool->iUserFreeHead = 0;
234 pPool->cMaxUsers = cMaxUsers;
235 PPGMPOOLUSER paUsers = (PPGMPOOLUSER)&pPool->aPages[pPool->cMaxPages];
236 pPool->paUsersR3 = paUsers;
237 pPool->paUsersR0 = MMHyperR3ToR0(pVM, paUsers);
238 pPool->paUsersRC = MMHyperR3ToRC(pVM, paUsers);
239 for (unsigned i = 0; i < cMaxUsers; i++)
240 {
241 paUsers[i].iNext = i + 1;
242 paUsers[i].iUser = NIL_PGMPOOL_IDX;
243 paUsers[i].iUserTable = 0xfffffffe;
244 }
245 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
246 pPool->iPhysExtFreeHead = 0;
247 pPool->cMaxPhysExts = cMaxPhysExts;
248 PPGMPOOLPHYSEXT paPhysExts = (PPGMPOOLPHYSEXT)&paUsers[cMaxUsers];
249 pPool->paPhysExtsR3 = paPhysExts;
250 pPool->paPhysExtsR0 = MMHyperR3ToR0(pVM, paPhysExts);
251 pPool->paPhysExtsRC = MMHyperR3ToRC(pVM, paPhysExts);
252 for (unsigned i = 0; i < cMaxPhysExts; i++)
253 {
254 paPhysExts[i].iNext = i + 1;
255 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
256 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
257 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
258 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
259 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
260 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
261 }
262 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
263 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
264 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
265 pPool->iAgeHead = NIL_PGMPOOL_IDX;
266 pPool->iAgeTail = NIL_PGMPOOL_IDX;
267 pPool->fCacheEnabled = fCacheEnabled;
268 pPool->pfnAccessHandlerR3 = pgmR3PoolAccessHandler;
269 pPool->pszAccessHandler = "Guest Paging Access Handler";
270 pPool->HCPhysTree = 0;
271
272 /* The NIL entry. */
273 Assert(NIL_PGMPOOL_IDX == 0);
274 pPool->aPages[NIL_PGMPOOL_IDX].enmKind = PGMPOOLKIND_INVALID;
275
276 /* The Shadow 32-bit PD. (32 bits guest paging) */
277 pPool->aPages[PGMPOOL_IDX_PD].Core.Key = NIL_RTHCPHYS;
278 pPool->aPages[PGMPOOL_IDX_PD].GCPhys = NIL_RTGCPHYS;
279 pPool->aPages[PGMPOOL_IDX_PD].pvPageR3 = 0;
280 pPool->aPages[PGMPOOL_IDX_PD].enmKind = PGMPOOLKIND_32BIT_PD;
281 pPool->aPages[PGMPOOL_IDX_PD].idx = PGMPOOL_IDX_PD;
282
283 /* The Shadow PDPT. */
284 pPool->aPages[PGMPOOL_IDX_PDPT].Core.Key = NIL_RTHCPHYS;
285 pPool->aPages[PGMPOOL_IDX_PDPT].GCPhys = NIL_RTGCPHYS;
286 pPool->aPages[PGMPOOL_IDX_PDPT].pvPageR3 = 0;
287 pPool->aPages[PGMPOOL_IDX_PDPT].enmKind = PGMPOOLKIND_PAE_PDPT;
288 pPool->aPages[PGMPOOL_IDX_PDPT].idx = PGMPOOL_IDX_PDPT;
289
290 /* The Shadow AMD64 CR3. */
291 pPool->aPages[PGMPOOL_IDX_AMD64_CR3].Core.Key = NIL_RTHCPHYS;
292 pPool->aPages[PGMPOOL_IDX_AMD64_CR3].GCPhys = NIL_RTGCPHYS;
293 pPool->aPages[PGMPOOL_IDX_AMD64_CR3].pvPageR3 = 0;
294 pPool->aPages[PGMPOOL_IDX_AMD64_CR3].enmKind = PGMPOOLKIND_64BIT_PML4;
295 pPool->aPages[PGMPOOL_IDX_AMD64_CR3].idx = PGMPOOL_IDX_AMD64_CR3;
296
297 /* The Nested Paging CR3. */
298 pPool->aPages[PGMPOOL_IDX_NESTED_ROOT].Core.Key = NIL_RTHCPHYS;
299 pPool->aPages[PGMPOOL_IDX_NESTED_ROOT].GCPhys = NIL_RTGCPHYS;
300 pPool->aPages[PGMPOOL_IDX_NESTED_ROOT].pvPageR3 = 0;
301 pPool->aPages[PGMPOOL_IDX_NESTED_ROOT].enmKind = PGMPOOLKIND_ROOT_NESTED;
302 pPool->aPages[PGMPOOL_IDX_NESTED_ROOT].idx = PGMPOOL_IDX_NESTED_ROOT;
303
304 /*
305 * Set common stuff.
306 */
307 for (unsigned iPage = 1; iPage < PGMPOOL_IDX_FIRST; iPage++)
308 {
309 pPool->aPages[iPage].iNext = NIL_PGMPOOL_IDX;
310 pPool->aPages[iPage].iUserHead = NIL_PGMPOOL_USER_INDEX;
311 pPool->aPages[iPage].iModifiedNext = NIL_PGMPOOL_IDX;
312 pPool->aPages[iPage].iModifiedPrev = NIL_PGMPOOL_IDX;
313 pPool->aPages[iPage].iMonitoredNext = NIL_PGMPOOL_IDX;
314 pPool->aPages[iPage].iMonitoredNext = NIL_PGMPOOL_IDX;
315 pPool->aPages[iPage].iAgeNext = NIL_PGMPOOL_IDX;
316 pPool->aPages[iPage].iAgePrev = NIL_PGMPOOL_IDX;
317 Assert(pPool->aPages[iPage].idx == iPage);
318 Assert(pPool->aPages[iPage].GCPhys == NIL_RTGCPHYS);
319 Assert(!pPool->aPages[iPage].fSeenNonGlobal);
320 Assert(!pPool->aPages[iPage].fMonitored);
321 Assert(!pPool->aPages[iPage].fCached);
322 Assert(!pPool->aPages[iPage].fZeroed);
323 Assert(!pPool->aPages[iPage].fReusedFlushPending);
324 }
325
326#ifdef VBOX_WITH_STATISTICS
327 /*
328 * Register statistics.
329 */
330 STAM_REG(pVM, &pPool->cCurPages, STAMTYPE_U16, "/PGM/Pool/cCurPages", STAMUNIT_PAGES, "Current pool size.");
331 STAM_REG(pVM, &pPool->cMaxPages, STAMTYPE_U16, "/PGM/Pool/cMaxPages", STAMUNIT_PAGES, "Max pool size.");
332 STAM_REG(pVM, &pPool->cUsedPages, STAMTYPE_U16, "/PGM/Pool/cUsedPages", STAMUNIT_PAGES, "The number of pages currently in use.");
333 STAM_REG(pVM, &pPool->cUsedPagesHigh, STAMTYPE_U16_RESET, "/PGM/Pool/cUsedPagesHigh", STAMUNIT_PAGES, "The high watermark for cUsedPages.");
334 STAM_REG(pVM, &pPool->StatAlloc, STAMTYPE_PROFILE_ADV, "/PGM/Pool/Alloc", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolAlloc.");
335 STAM_REG(pVM, &pPool->StatClearAll, STAMTYPE_PROFILE, "/PGM/Pool/ClearAll", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmR3PoolClearAll.");
336 STAM_REG(pVM, &pPool->StatR3Reset, STAMTYPE_PROFILE, "/PGM/Pool/R3Reset", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmR3PoolReset.");
337 STAM_REG(pVM, &pPool->StatFlushPage, STAMTYPE_PROFILE, "/PGM/Pool/FlushPage", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolFlushPage.");
338 STAM_REG(pVM, &pPool->StatFree, STAMTYPE_PROFILE, "/PGM/Pool/Free", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolFree.");
339 STAM_REG(pVM, &pPool->StatForceFlushPage, STAMTYPE_COUNTER, "/PGM/Pool/FlushForce", STAMUNIT_OCCURENCES, "Counting explicit flushes by PGMPoolFlushPage().");
340 STAM_REG(pVM, &pPool->StatForceFlushDirtyPage, STAMTYPE_COUNTER, "/PGM/Pool/FlushForceDirty", STAMUNIT_OCCURENCES, "Counting explicit flushes of dirty pages by PGMPoolFlushPage().");
341 STAM_REG(pVM, &pPool->StatForceFlushReused, STAMTYPE_COUNTER, "/PGM/Pool/FlushReused", STAMUNIT_OCCURENCES, "Counting flushes for reused pages.");
342 STAM_REG(pVM, &pPool->StatZeroPage, STAMTYPE_PROFILE, "/PGM/Pool/ZeroPage", STAMUNIT_TICKS_PER_CALL, "Profiling time spent zeroing pages. Overlaps with Alloc.");
343 STAM_REG(pVM, &pPool->cMaxUsers, STAMTYPE_U16, "/PGM/Pool/Track/cMaxUsers", STAMUNIT_COUNT, "Max user tracking records.");
344 STAM_REG(pVM, &pPool->cPresent, STAMTYPE_U32, "/PGM/Pool/Track/cPresent", STAMUNIT_COUNT, "Number of present page table entries.");
345 STAM_REG(pVM, &pPool->StatTrackDeref, STAMTYPE_PROFILE, "/PGM/Pool/Track/Deref", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolTrackDeref.");
346 STAM_REG(pVM, &pPool->StatTrackFlushGCPhysPT, STAMTYPE_PROFILE, "/PGM/Pool/Track/FlushGCPhysPT", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolTrackFlushGCPhysPT.");
347 STAM_REG(pVM, &pPool->StatTrackFlushGCPhysPTs, STAMTYPE_PROFILE, "/PGM/Pool/Track/FlushGCPhysPTs", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolTrackFlushGCPhysPTs.");
348 STAM_REG(pVM, &pPool->StatTrackFlushGCPhysPTsSlow, STAMTYPE_PROFILE, "/PGM/Pool/Track/FlushGCPhysPTsSlow", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolTrackFlushGCPhysPTsSlow.");
349 STAM_REG(pVM, &pPool->StatTrackFlushEntry, STAMTYPE_COUNTER, "/PGM/Pool/Track/Entry/Flush", STAMUNIT_COUNT, "Nr of flushed entries.");
350 STAM_REG(pVM, &pPool->StatTrackFlushEntryKeep, STAMTYPE_COUNTER, "/PGM/Pool/Track/Entry/Update", STAMUNIT_COUNT, "Nr of updated entries.");
351 STAM_REG(pVM, &pPool->StatTrackFreeUpOneUser, STAMTYPE_COUNTER, "/PGM/Pool/Track/FreeUpOneUser", STAMUNIT_TICKS_PER_CALL, "The number of times we were out of user tracking records.");
352 STAM_REG(pVM, &pPool->StatTrackDerefGCPhys, STAMTYPE_PROFILE, "/PGM/Pool/Track/DrefGCPhys", STAMUNIT_TICKS_PER_CALL, "Profiling deref activity related tracking GC physical pages.");
353 STAM_REG(pVM, &pPool->StatTrackLinearRamSearches, STAMTYPE_COUNTER, "/PGM/Pool/Track/LinearRamSearches", STAMUNIT_OCCURENCES, "The number of times we had to do linear ram searches.");
354 STAM_REG(pVM, &pPool->StamTrackPhysExtAllocFailures,STAMTYPE_COUNTER, "/PGM/Pool/Track/PhysExtAllocFailures", STAMUNIT_OCCURENCES, "The number of failing pgmPoolTrackPhysExtAlloc calls.");
355 STAM_REG(pVM, &pPool->StatMonitorRZ, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ", STAMUNIT_TICKS_PER_CALL, "Profiling the RC/R0 access handler.");
356 STAM_REG(pVM, &pPool->StatMonitorRZEmulateInstr, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/EmulateInstr", STAMUNIT_OCCURENCES, "Times we've failed interpreting the instruction.");
357 STAM_REG(pVM, &pPool->StatMonitorRZFlushPage, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/FlushPage", STAMUNIT_TICKS_PER_CALL, "Profiling the pgmPoolFlushPage calls made from the RC/R0 access handler.");
358 STAM_REG(pVM, &pPool->StatMonitorRZFlushReinit, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/FlushReinit", STAMUNIT_OCCURENCES, "Times we've detected a page table reinit.");
359 STAM_REG(pVM, &pPool->StatMonitorRZFlushModOverflow,STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/FlushOverflow", STAMUNIT_OCCURENCES, "Counting flushes for pages that are modified too often.");
360 STAM_REG(pVM, &pPool->StatMonitorRZFork, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/Fork", STAMUNIT_OCCURENCES, "Times we've detected fork().");
361 STAM_REG(pVM, &pPool->StatMonitorRZHandled, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/Handled", STAMUNIT_TICKS_PER_CALL, "Profiling the RC/R0 access we've handled (except REP STOSD).");
362 STAM_REG(pVM, &pPool->StatMonitorRZIntrFailPatch1, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/IntrFailPatch1", STAMUNIT_OCCURENCES, "Times we've failed interpreting a patch code instruction.");
363 STAM_REG(pVM, &pPool->StatMonitorRZIntrFailPatch2, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/IntrFailPatch2", STAMUNIT_OCCURENCES, "Times we've failed interpreting a patch code instruction during flushing.");
364 STAM_REG(pVM, &pPool->StatMonitorRZRepPrefix, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/RepPrefix", STAMUNIT_OCCURENCES, "The number of times we've seen rep prefixes we can't handle.");
365 STAM_REG(pVM, &pPool->StatMonitorRZRepStosd, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/RepStosd", STAMUNIT_TICKS_PER_CALL, "Profiling the REP STOSD cases we've handled.");
366 STAM_REG(pVM, &pPool->StatMonitorRZFaultPT, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/Fault/PT", STAMUNIT_OCCURENCES, "Nr of handled PT faults.");
367 STAM_REG(pVM, &pPool->StatMonitorRZFaultPD, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/Fault/PD", STAMUNIT_OCCURENCES, "Nr of handled PD faults.");
368 STAM_REG(pVM, &pPool->StatMonitorRZFaultPDPT, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/Fault/PDPT", STAMUNIT_OCCURENCES, "Nr of handled PDPT faults.");
369 STAM_REG(pVM, &pPool->StatMonitorRZFaultPML4, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/Fault/PML4", STAMUNIT_OCCURENCES, "Nr of handled PML4 faults.");
370 STAM_REG(pVM, &pPool->StatMonitorR3, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3", STAMUNIT_TICKS_PER_CALL, "Profiling the R3 access handler.");
371 STAM_REG(pVM, &pPool->StatMonitorR3EmulateInstr, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/EmulateInstr", STAMUNIT_OCCURENCES, "Times we've failed interpreting the instruction.");
372 STAM_REG(pVM, &pPool->StatMonitorR3FlushPage, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/FlushPage", STAMUNIT_TICKS_PER_CALL, "Profiling the pgmPoolFlushPage calls made from the R3 access handler.");
373 STAM_REG(pVM, &pPool->StatMonitorR3FlushReinit, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/FlushReinit", STAMUNIT_OCCURENCES, "Times we've detected a page table reinit.");
374 STAM_REG(pVM, &pPool->StatMonitorR3FlushModOverflow,STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/FlushOverflow", STAMUNIT_OCCURENCES, "Counting flushes for pages that are modified too often.");
375 STAM_REG(pVM, &pPool->StatMonitorR3Fork, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/Fork", STAMUNIT_OCCURENCES, "Times we've detected fork().");
376 STAM_REG(pVM, &pPool->StatMonitorR3Handled, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Handled", STAMUNIT_TICKS_PER_CALL, "Profiling the R3 access we've handled (except REP STOSD).");
377 STAM_REG(pVM, &pPool->StatMonitorR3RepPrefix, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/RepPrefix", STAMUNIT_OCCURENCES, "The number of times we've seen rep prefixes we can't handle.");
378 STAM_REG(pVM, &pPool->StatMonitorR3RepStosd, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/RepStosd", STAMUNIT_TICKS_PER_CALL, "Profiling the REP STOSD cases we've handled.");
379 STAM_REG(pVM, &pPool->StatMonitorR3FaultPT, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/Fault/PT", STAMUNIT_OCCURENCES, "Nr of handled PT faults.");
380 STAM_REG(pVM, &pPool->StatMonitorR3FaultPD, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/Fault/PD", STAMUNIT_OCCURENCES, "Nr of handled PD faults.");
381 STAM_REG(pVM, &pPool->StatMonitorR3FaultPDPT, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/Fault/PDPT", STAMUNIT_OCCURENCES, "Nr of handled PDPT faults.");
382 STAM_REG(pVM, &pPool->StatMonitorR3FaultPML4, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/Fault/PML4", STAMUNIT_OCCURENCES, "Nr of handled PML4 faults.");
383 STAM_REG(pVM, &pPool->StatMonitorR3Async, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/Async", STAMUNIT_OCCURENCES, "Times we're called in an async thread and need to flush.");
384 STAM_REG(pVM, &pPool->cModifiedPages, STAMTYPE_U16, "/PGM/Pool/Monitor/cModifiedPages", STAMUNIT_PAGES, "The current cModifiedPages value.");
385 STAM_REG(pVM, &pPool->cModifiedPagesHigh, STAMTYPE_U16_RESET, "/PGM/Pool/Monitor/cModifiedPagesHigh", STAMUNIT_PAGES, "The high watermark for cModifiedPages.");
386 STAM_REG(pVM, &pPool->StatResetDirtyPages, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/Dirty/Resets", STAMUNIT_OCCURENCES, "Times we've called pgmPoolResetDirtyPages (and there were dirty page).");
387 STAM_REG(pVM, &pPool->StatDirtyPage, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/Dirty/Pages", STAMUNIT_OCCURENCES, "Times we've called pgmPoolAddDirtyPage.");
388 STAM_REG(pVM, &pPool->StatDirtyPageDupFlush, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/Dirty/FlushDup", STAMUNIT_OCCURENCES, "Times we've had to flush duplicates for dirty page management.");
389 STAM_REG(pVM, &pPool->StatDirtyPageOverFlowFlush, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/Dirty/FlushOverflow",STAMUNIT_OCCURENCES, "Times we've had to flush because of overflow.");
390 STAM_REG(pVM, &pPool->StatCacheHits, STAMTYPE_COUNTER, "/PGM/Pool/Cache/Hits", STAMUNIT_OCCURENCES, "The number of pgmPoolAlloc calls satisfied by the cache.");
391 STAM_REG(pVM, &pPool->StatCacheMisses, STAMTYPE_COUNTER, "/PGM/Pool/Cache/Misses", STAMUNIT_OCCURENCES, "The number of pgmPoolAlloc calls not statisfied by the cache.");
392 STAM_REG(pVM, &pPool->StatCacheKindMismatches, STAMTYPE_COUNTER, "/PGM/Pool/Cache/KindMismatches", STAMUNIT_OCCURENCES, "The number of shadow page kind mismatches. (Better be low, preferably 0!)");
393 STAM_REG(pVM, &pPool->StatCacheFreeUpOne, STAMTYPE_COUNTER, "/PGM/Pool/Cache/FreeUpOne", STAMUNIT_OCCURENCES, "The number of times the cache was asked to free up a page.");
394 STAM_REG(pVM, &pPool->StatCacheCacheable, STAMTYPE_COUNTER, "/PGM/Pool/Cache/Cacheable", STAMUNIT_OCCURENCES, "The number of cacheable allocations.");
395 STAM_REG(pVM, &pPool->StatCacheUncacheable, STAMTYPE_COUNTER, "/PGM/Pool/Cache/Uncacheable", STAMUNIT_OCCURENCES, "The number of uncacheable allocations.");
396#endif /* VBOX_WITH_STATISTICS */
397
398#ifdef VBOX_WITH_DEBUGGER
399 /*
400 * Debugger commands.
401 */
402 static bool s_fRegisteredCmds = false;
403 if (!s_fRegisteredCmds)
404 {
405 rc = DBGCRegisterCommands(&g_aCmds[0], RT_ELEMENTS(g_aCmds));
406 if (RT_SUCCESS(rc))
407 s_fRegisteredCmds = true;
408 }
409#endif
410
411 return VINF_SUCCESS;
412}
413
414
415/**
416 * Relocate the page pool data.
417 *
418 * @param pVM The VM handle.
419 */
420void pgmR3PoolRelocate(PVM pVM)
421{
422 pVM->pgm.s.pPoolRC = MMHyperR3ToRC(pVM, pVM->pgm.s.pPoolR3);
423 pVM->pgm.s.pPoolR3->pVMRC = pVM->pVMRC;
424 pVM->pgm.s.pPoolR3->paUsersRC = MMHyperR3ToRC(pVM, pVM->pgm.s.pPoolR3->paUsersR3);
425 pVM->pgm.s.pPoolR3->paPhysExtsRC = MMHyperR3ToRC(pVM, pVM->pgm.s.pPoolR3->paPhysExtsR3);
426 int rc = PDMR3LdrGetSymbolRC(pVM, NULL, "pgmPoolAccessHandler", &pVM->pgm.s.pPoolR3->pfnAccessHandlerRC);
427 AssertReleaseRC(rc);
428 /* init order hack. */
429 if (!pVM->pgm.s.pPoolR3->pfnAccessHandlerR0)
430 {
431 rc = PDMR3LdrGetSymbolR0(pVM, NULL, "pgmPoolAccessHandler", &pVM->pgm.s.pPoolR3->pfnAccessHandlerR0);
432 AssertReleaseRC(rc);
433 }
434}
435
436
437/**
438 * Grows the shadow page pool.
439 *
440 * I.e. adds more pages to it, assuming that hasn't reached cMaxPages yet.
441 *
442 * @returns VBox status code.
443 * @param pVM The VM handle.
444 */
445VMMR3DECL(int) PGMR3PoolGrow(PVM pVM)
446{
447 PPGMPOOL pPool = pVM->pgm.s.pPoolR3;
448 AssertReturn(pPool->cCurPages < pPool->cMaxPages, VERR_INTERNAL_ERROR);
449
450 pgmLock(pVM);
451
452 /*
453 * How much to grow it by?
454 */
455 uint32_t cPages = pPool->cMaxPages - pPool->cCurPages;
456 cPages = RT_MIN(PGMPOOL_CFG_MAX_GROW, cPages);
457 LogFlow(("PGMR3PoolGrow: Growing the pool by %d (%#x) pages.\n", cPages, cPages));
458
459 for (unsigned i = pPool->cCurPages; cPages-- > 0; i++)
460 {
461 PPGMPOOLPAGE pPage = &pPool->aPages[i];
462
463 /* Allocate all pages in low (below 4 GB) memory as 32 bits guests need a page table root in low memory. */
464 pPage->pvPageR3 = MMR3PageAllocLow(pVM);
465 if (!pPage->pvPageR3)
466 {
467 Log(("We're out of memory!! i=%d\n", i));
468 pgmUnlock(pVM);
469 return i ? VINF_SUCCESS : VERR_NO_PAGE_MEMORY;
470 }
471 pPage->Core.Key = MMPage2Phys(pVM, pPage->pvPageR3);
472 AssertFatal(pPage->Core.Key < _4G);
473 pPage->GCPhys = NIL_RTGCPHYS;
474 pPage->enmKind = PGMPOOLKIND_FREE;
475 pPage->idx = pPage - &pPool->aPages[0];
476 LogFlow(("PGMR3PoolGrow: insert page #%#x - %RHp\n", pPage->idx, pPage->Core.Key));
477 pPage->iNext = pPool->iFreeHead;
478 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
479 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
480 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
481 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
482 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
483 pPage->iAgeNext = NIL_PGMPOOL_IDX;
484 pPage->iAgePrev = NIL_PGMPOOL_IDX;
485 /* commit it */
486 bool fRc = RTAvloHCPhysInsert(&pPool->HCPhysTree, &pPage->Core); Assert(fRc); NOREF(fRc);
487 pPool->iFreeHead = i;
488 pPool->cCurPages = i + 1;
489 }
490
491 pgmUnlock(pVM);
492 Assert(pPool->cCurPages <= pPool->cMaxPages);
493 return VINF_SUCCESS;
494}
495
496
497
498/**
499 * Worker used by pgmR3PoolAccessHandler when it's invoked by an async thread.
500 *
501 * @param pPool The pool.
502 * @param pPage The page.
503 */
504static DECLCALLBACK(void) pgmR3PoolFlushReusedPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
505{
506 /* for the present this should be safe enough I think... */
507 pgmLock(pPool->pVMR3);
508 if ( pPage->fReusedFlushPending
509 && pPage->enmKind != PGMPOOLKIND_FREE)
510 pgmPoolFlushPage(pPool, pPage);
511 pgmUnlock(pPool->pVMR3);
512}
513
514
515/**
516 * \#PF Handler callback for PT write accesses.
517 *
518 * The handler can not raise any faults, it's mainly for monitoring write access
519 * to certain pages.
520 *
521 * @returns VINF_SUCCESS if the handler has carried out the operation.
522 * @returns VINF_PGM_HANDLER_DO_DEFAULT if the caller should carry out the access operation.
523 * @param pVM VM Handle.
524 * @param GCPhys The physical address the guest is writing to.
525 * @param pvPhys The HC mapping of that address.
526 * @param pvBuf What the guest is reading/writing.
527 * @param cbBuf How much it's reading/writing.
528 * @param enmAccessType The access type.
529 * @param pvUser User argument.
530 */
531static DECLCALLBACK(int) pgmR3PoolAccessHandler(PVM pVM, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf, PGMACCESSTYPE enmAccessType, void *pvUser)
532{
533 STAM_PROFILE_START(&pVM->pgm.s.pPoolR3->StatMonitorR3, a);
534 PPGMPOOL pPool = pVM->pgm.s.pPoolR3;
535 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
536 LogFlow(("pgmR3PoolAccessHandler: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
537 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
538
539 PVMCPU pVCpu = VMMGetCpu(pVM);
540
541 /*
542 * We don't have to be very sophisticated about this since there are relativly few calls here.
543 * However, we must try our best to detect any non-cpu accesses (disk / networking).
544 *
545 * Just to make life more interesting, we'll have to deal with the async threads too.
546 * We cannot flush a page if we're in an async thread because of REM notifications.
547 */
548 pgmLock(pVM);
549 if (PHYS_PAGE_ADDRESS(GCPhys) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
550 {
551 /* Pool page changed while we were waiting for the lock; ignore. */
552 Log(("CPU%d: pgmR3PoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
553 pgmUnlock(pVM);
554 return VINF_PGM_HANDLER_DO_DEFAULT;
555 }
556
557 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
558
559 /* @todo this code doesn't make any sense. remove the if (!pVCpu) block */
560 if (!pVCpu) /** @todo This shouldn't happen any longer, all access handlers will be called on an EMT. All ring-3 handlers, except MMIO, already own the PGM lock. @bugref{3170} */
561 {
562 Log(("pgmR3PoolAccessHandler: async thread, requesting EMT to flush the page: %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
563 pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
564 STAM_COUNTER_INC(&pPool->StatMonitorR3Async);
565 if (!pPage->fReusedFlushPending)
566 {
567 pgmUnlock(pVM);
568 int rc = VMR3ReqCallVoidNoWait(pPool->pVMR3, VMCPUID_ANY, (PFNRT)pgmR3PoolFlushReusedPage, 2, pPool, pPage);
569 AssertRCReturn(rc, rc);
570 pgmLock(pVM);
571 pPage->fReusedFlushPending = true;
572 pPage->cModifications += 0x1000;
573 }
574
575 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvPhys, 0 /* unknown write size */);
576 /** @todo r=bird: making unsafe assumption about not crossing entries here! */
577 while (cbBuf > 4)
578 {
579 cbBuf -= 4;
580 pvPhys = (uint8_t *)pvPhys + 4;
581 GCPhys += 4;
582 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvPhys, 0 /* unknown write size */);
583 }
584 STAM_PROFILE_STOP(&pPool->StatMonitorR3, a);
585 }
586 else if ( ( pPage->cModifications < 96 /* it's cheaper here. */
587 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
588 )
589 && cbBuf <= 4)
590 {
591 /* Clear the shadow entry. */
592 if (!pPage->cModifications++)
593 pgmPoolMonitorModifiedInsert(pPool, pPage);
594 /** @todo r=bird: making unsafe assumption about not crossing entries here! */
595 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvPhys, 0 /* unknown write size */);
596 STAM_PROFILE_STOP(&pPool->StatMonitorR3, a);
597 }
598 else
599 {
600 pgmPoolMonitorChainFlush(pPool, pPage); /* ASSUME that VERR_PGM_POOL_CLEARED can be ignored here and that FFs will deal with it in due time. */
601 STAM_PROFILE_STOP_EX(&pPool->StatMonitorR3, &pPool->StatMonitorR3FlushPage, a);
602 }
603 pgmUnlock(pVM);
604 return VINF_PGM_HANDLER_DO_DEFAULT;
605}
606
607
608/**
609 * Rendezvous callback used by pgmR3PoolClearAll that clears all shadow pages
610 * and all modification counters.
611 *
612 * This is only called on one of the EMTs while the other ones are waiting for
613 * it to complete this function.
614 *
615 * @returns VINF_SUCCESS (VBox strict status code).
616 * @param pVM The VM handle.
617 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
618 * @param fpvFlushRemTlb When not NULL, we'll flush the REM TLB as well.
619 * (This is the pvUser, so it has to be void *.)
620 *
621 */
622DECLCALLBACK(VBOXSTRICTRC) pgmR3PoolClearAllRendezvous(PVM pVM, PVMCPU pVCpu, void *fpvFlushRemTbl)
623{
624 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
625 STAM_PROFILE_START(&pPool->StatClearAll, c);
626
627 pgmLock(pVM);
628 Log(("pgmR3PoolClearAllRendezvous: cUsedPages=%d fpvFlushRemTbl=%RTbool\n", pPool->cUsedPages, !!fpvFlushRemTbl));
629
630 /*
631 * Iterate all the pages until we've encountered all that are in use.
632 * This is a simple but not quite optimal solution.
633 */
634 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
635 unsigned cLeft = pPool->cUsedPages;
636 unsigned iPage = pPool->cCurPages;
637 while (--iPage >= PGMPOOL_IDX_FIRST)
638 {
639 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
640 if (pPage->GCPhys != NIL_RTGCPHYS)
641 {
642 switch (pPage->enmKind)
643 {
644 /*
645 * We only care about shadow page tables.
646 */
647 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
648 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
649 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
650 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
651 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
652 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
653 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
654 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
655 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
656 {
657 if (pPage->cPresent)
658 {
659 void *pvShw = PGMPOOL_PAGE_2_PTR_V2(pPool->CTX_SUFF(pVM), pVCpu, pPage);
660 STAM_PROFILE_START(&pPool->StatZeroPage, z);
661#if 0
662 /* Useful check for leaking references; *very* expensive though. */
663 switch (pPage->enmKind)
664 {
665 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
666 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
667 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
668 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
669 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
670 {
671 bool fFoundFirst = false;
672 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)pvShw;
673 for (unsigned ptIndex = 0; ptIndex < RT_ELEMENTS(pPT->a); ptIndex++)
674 {
675 if (pPT->a[ptIndex].u)
676 {
677 if (!fFoundFirst)
678 {
679 AssertFatalMsg(pPage->iFirstPresent <= ptIndex, ("ptIndex = %d first present = %d\n", ptIndex, pPage->iFirstPresent));
680 if (pPage->iFirstPresent != ptIndex)
681 Log(("ptIndex = %d first present = %d\n", ptIndex, pPage->iFirstPresent));
682 fFoundFirst = true;
683 }
684 if (PGMSHWPTEPAE_IS_P(pPT->a[ptIndex]))
685 {
686 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pPT->a[ptIndex]), NIL_RTGCPHYS);
687 if (pPage->iFirstPresent == ptIndex)
688 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
689 }
690 }
691 }
692 AssertFatalMsg(pPage->cPresent == 0, ("cPresent = %d pPage = %RGv\n", pPage->cPresent, pPage->GCPhys));
693 break;
694 }
695 default:
696 break;
697 }
698#endif
699 ASMMemZeroPage(pvShw);
700 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
701 pPage->cPresent = 0;
702 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
703 }
704 }
705 /* fall thru */
706
707 default:
708 Assert(!pPage->cModifications || ++cModifiedPages);
709 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
710 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
711 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
712 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
713 pPage->cModifications = 0;
714 break;
715
716 }
717 if (!--cLeft)
718 break;
719 }
720 }
721
722 /* swipe the special pages too. */
723 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
724 {
725 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
726 if (pPage->GCPhys != NIL_RTGCPHYS)
727 {
728 Assert(!pPage->cModifications || ++cModifiedPages);
729 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
730 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
731 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
732 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
733 pPage->cModifications = 0;
734 }
735 }
736
737#ifndef DEBUG_michael
738 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
739#endif
740 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
741 pPool->cModifiedPages = 0;
742
743 /*
744 * Clear all the GCPhys links and rebuild the phys ext free list.
745 */
746 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
747 pRam;
748 pRam = pRam->CTX_SUFF(pNext))
749 {
750 iPage = pRam->cb >> PAGE_SHIFT;
751 while (iPage-- > 0)
752 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
753 }
754
755 pPool->iPhysExtFreeHead = 0;
756 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
757 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
758 for (unsigned i = 0; i < cMaxPhysExts; i++)
759 {
760 paPhysExts[i].iNext = i + 1;
761 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
762 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
763 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
764 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
765 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
766 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
767 }
768 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
769
770
771#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
772 /* Reset all dirty pages to reactivate the page monitoring. */
773 /* Note: we must do this *after* clearing all page references and shadow page tables as there might be stale references to
774 * recently removed MMIO ranges around that might otherwise end up asserting in pgmPoolTracDerefGCPhysHint
775 */
776 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
777 {
778 PPGMPOOLPAGE pPage;
779 unsigned idxPage;
780
781 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
782 continue;
783
784 idxPage = pPool->aDirtyPages[i].uIdx;
785 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
786 pPage = &pPool->aPages[idxPage];
787 Assert(pPage->idx == idxPage);
788 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
789
790 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, i));
791
792 Log(("Reactivate dirty page %RGp\n", pPage->GCPhys));
793
794 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
795 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
796 Assert(rc == VINF_SUCCESS);
797 pPage->fDirty = false;
798
799 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
800 }
801
802 /* Clear all dirty pages. */
803 pPool->idxFreeDirtyPage = 0;
804 pPool->cDirtyPages = 0;
805#endif
806
807 /* Clear the PGM_SYNC_CLEAR_PGM_POOL flag on all VCPUs to prevent redundant flushes. */
808 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
809 pVM->aCpus[idCpu].pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
810
811 /* Flush job finished. */
812 VM_FF_CLEAR(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
813 pPool->cPresent = 0;
814 pgmUnlock(pVM);
815
816 PGM_INVL_ALL_VCPU_TLBS(pVM);
817
818 if (fpvFlushRemTbl)
819 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
820 CPUMSetChangedFlags(&pVM->aCpus[idCpu], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
821
822 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
823 return VINF_SUCCESS;
824}
825
826
827/**
828 * Clears the shadow page pool.
829 *
830 * @param pVM The VM handle.
831 * @param fFlushRemTlb When set, the REM TLB is scheduled for flushing as
832 * well.
833 */
834void pgmR3PoolClearAll(PVM pVM, bool fFlushRemTlb)
835{
836 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PoolClearAllRendezvous, &fFlushRemTlb);
837 AssertRC(rc);
838}
839
840/**
841 * Protect all pgm pool page table entries to monitor writes
842 *
843 * @param pVM The VM handle.
844 *
845 * Remark: assumes the caller will flush all TLBs (!!)
846 */
847void pgmR3PoolWriteProtectPages(PVM pVM)
848{
849 Assert(PGMIsLockOwner(pVM));
850 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
851 unsigned cLeft = pPool->cUsedPages;
852 unsigned iPage = pPool->cCurPages;
853 while (--iPage >= PGMPOOL_IDX_FIRST)
854 {
855 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
856 if ( pPage->GCPhys != NIL_RTGCPHYS
857 && pPage->cPresent)
858 {
859 union
860 {
861 void *pv;
862 PX86PT pPT;
863 PPGMSHWPTPAE pPTPae;
864 PEPTPT pPTEpt;
865 } uShw;
866 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
867
868 switch (pPage->enmKind)
869 {
870 /*
871 * We only care about shadow page tables.
872 */
873 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
874 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
875 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
876 for (unsigned iShw = 0; iShw < RT_ELEMENTS(uShw.pPT->a); iShw++)
877 {
878 if (uShw.pPT->a[iShw].n.u1Present)
879 uShw.pPT->a[iShw].n.u1Write = 0;
880 }
881 break;
882
883 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
884 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
885 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
886 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
887 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
888 for (unsigned iShw = 0; iShw < RT_ELEMENTS(uShw.pPTPae->a); iShw++)
889 {
890 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
891 PGMSHWPTEPAE_SET_RO(uShw.pPTPae->a[iShw]);
892 }
893 break;
894
895 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
896 for (unsigned iShw = 0; iShw < RT_ELEMENTS(uShw.pPTEpt->a); iShw++)
897 {
898 if (uShw.pPTEpt->a[iShw].n.u1Present)
899 uShw.pPTEpt->a[iShw].n.u1Write = 0;
900 }
901 break;
902
903 default:
904 break;
905 }
906 if (!--cLeft)
907 break;
908 }
909 }
910}
911
912#ifdef VBOX_WITH_DEBUGGER
913/**
914 * The '.pgmpoolcheck' command.
915 *
916 * @returns VBox status.
917 * @param pCmd Pointer to the command descriptor (as registered).
918 * @param pCmdHlp Pointer to command helper functions.
919 * @param pVM Pointer to the current VM (if any).
920 * @param paArgs Pointer to (readonly) array of arguments.
921 * @param cArgs Number of arguments in the array.
922 */
923static DECLCALLBACK(int) pgmR3PoolCmdCheck(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PVM pVM, PCDBGCVAR paArgs, unsigned cArgs, PDBGCVAR pResult)
924{
925 /*
926 * Validate input.
927 */
928 if (!pVM)
929 return pCmdHlp->pfnPrintf(pCmdHlp, NULL, "error: The command requires a VM to be selected.\n");
930
931 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
932
933 for (unsigned i = 0; i < pPool->cCurPages; i++)
934 {
935 PPGMPOOLPAGE pPage = &pPool->aPages[i];
936 bool fFirstMsg = true;
937
938 /* Todo: cover other paging modes too. */
939 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
940 {
941 PPGMSHWPTPAE pShwPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
942 {
943 PX86PTPAE pGstPT;
944 PGMPAGEMAPLOCK LockPage;
945 int rc = PGMPhysGCPhys2CCPtrReadOnly(pVM, pPage->GCPhys, (const void **)&pGstPT, &LockPage); AssertReleaseRC(rc);
946
947 /* Check if any PTEs are out of sync. */
948 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
949 {
950 if (PGMSHWPTEPAE_IS_P(pShwPT->a[j]))
951 {
952 RTHCPHYS HCPhys = NIL_RTHCPHYS;
953 rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[j].u & X86_PTE_PAE_PG_MASK, &HCPhys);
954 if ( rc != VINF_SUCCESS
955 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[j]) != HCPhys)
956 {
957 if (fFirstMsg)
958 {
959 pCmdHlp->pfnPrintf(pCmdHlp, NULL, "Check pool page %RGp\n", pPage->GCPhys);
960 fFirstMsg = false;
961 }
962 pCmdHlp->pfnPrintf(pCmdHlp, NULL, "Mismatch HCPhys: rc=%Rrc idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, j, pGstPT->a[j].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), HCPhys);
963 }
964 else if ( PGMSHWPTEPAE_IS_RW(pShwPT->a[j])
965 && !pGstPT->a[j].n.u1Write)
966 {
967 if (fFirstMsg)
968 {
969 pCmdHlp->pfnPrintf(pCmdHlp, NULL, "Check pool page %RGp\n", pPage->GCPhys);
970 fFirstMsg = false;
971 }
972 pCmdHlp->pfnPrintf(pCmdHlp, NULL, "Mismatch r/w gst/shw: idx=%d guest %RX64 shw=%RX64 vs %RHp\n", j, pGstPT->a[j].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), HCPhys);
973 }
974 }
975 }
976 PGMPhysReleasePageMappingLock(pVM, &LockPage);
977 }
978
979 /* Make sure this page table can't be written to from any shadow mapping. */
980 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
981 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pPage->GCPhys, &HCPhysPT);
982 AssertMsgRC(rc, ("PGMPhysGCPhys2HCPhys failed with rc=%d for %RGp\n", rc, pPage->GCPhys));
983 if (rc == VINF_SUCCESS)
984 {
985 for (unsigned j = 0; j < pPool->cCurPages; j++)
986 {
987 PPGMPOOLPAGE pTempPage = &pPool->aPages[j];
988
989 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
990 {
991 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pTempPage);
992
993 for (unsigned k = 0; k < RT_ELEMENTS(pShwPT->a); k++)
994 {
995 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[k])
996# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
997 && !pPage->fDirty
998# endif
999 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[k]) == HCPhysPT)
1000 {
1001 if (fFirstMsg)
1002 {
1003 pCmdHlp->pfnPrintf(pCmdHlp, NULL, "Check pool page %RGp\n", pPage->GCPhys);
1004 fFirstMsg = false;
1005 }
1006 pCmdHlp->pfnPrintf(pCmdHlp, NULL, "Mismatch: r/w: GCPhys=%RGp idx=%d shw %RX64 %RX64\n", pTempPage->GCPhys, k, PGMSHWPTEPAE_GET_LOG(pShwPT->a[k]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[k]));
1007 }
1008 }
1009 }
1010 }
1011 }
1012 }
1013 }
1014 return VINF_SUCCESS;
1015}
1016#endif /* VBOX_WITH_DEBUGGER */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette