VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 17059

Last change on this file since 17059 was 17052, checked in by vboxsync, 16 years ago

Missing case

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 172.7 KB
Line 
1/* $Id: PGMAllPool.cpp 17052 2009-02-24 10:15:33Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48__BEGIN_DECLS
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50#ifdef PGMPOOL_WITH_USER_TRACKING
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54#endif
55#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
56static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
57#endif
58#ifdef PGMPOOL_WITH_CACHE
59static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
60#endif
61#ifdef PGMPOOL_WITH_MONITORING
62static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
63#endif
64#ifndef IN_RING3
65DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
66#endif
67#ifdef LOG_ENABLED
68static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
69#endif
70__END_DECLS
71
72
73/**
74 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
75 *
76 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
77 * @param enmKind The page kind.
78 */
79DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
80{
81 switch (enmKind)
82 {
83 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
84 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
85 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
86 return true;
87 default:
88 return false;
89 }
90}
91
92
93#if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
94/**
95 * Maps a pool page into the current context.
96 *
97 * @returns Pointer to the mapping.
98 * @param pPGM Pointer to the PGM instance data.
99 * @param pPage The page to map.
100 */
101void *pgmPoolMapPageFallback(PPGM pPGM, PPGMPOOLPAGE pPage)
102{
103 /* general pages are take care of by the inlined part, it
104 only ends up here in case of failure. */
105 AssertReleaseReturn(pPage->idx < PGMPOOL_IDX_FIRST, NULL);
106
107/** @todo make sure HCPhys is valid for *all* indexes. */
108 /* special pages. */
109# ifdef IN_RC
110 switch (pPage->idx)
111 {
112# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
113 case PGMPOOL_IDX_PD:
114 case PGMPOOL_IDX_PDPT:
115 case PGMPOOL_IDX_AMD64_CR3:
116 return pPGM->pShwRootRC;
117# else
118 case PGMPOOL_IDX_PD:
119 return pPGM->pShw32BitPdRC;
120 case PGMPOOL_IDX_PAE_PD:
121 case PGMPOOL_IDX_PAE_PD_0:
122 return pPGM->apShwPaePDsRC[0];
123 case PGMPOOL_IDX_PAE_PD_1:
124 return pPGM->apShwPaePDsRC[1];
125 case PGMPOOL_IDX_PAE_PD_2:
126 return pPGM->apShwPaePDsRC[2];
127 case PGMPOOL_IDX_PAE_PD_3:
128 return pPGM->apShwPaePDsRC[3];
129 case PGMPOOL_IDX_PDPT:
130 return pPGM->pShwPaePdptRC;
131# endif
132 default:
133 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
134 return NULL;
135 }
136
137# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
138 RTHCPHYS HCPhys;
139 switch (pPage->idx)
140 {
141# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
142 case PGMPOOL_IDX_PD:
143 case PGMPOOL_IDX_PDPT:
144 case PGMPOOL_IDX_AMD64_CR3:
145 HCPhys = pPGM->HCPhysShwCR3;
146 break;
147
148 case PGMPOOL_IDX_NESTED_ROOT:
149 HCPhys = pPGM->HCPhysShwNestedRoot;
150 break;
151# else
152 case PGMPOOL_IDX_PD:
153 HCPhys = pPGM->HCPhysShw32BitPD;
154 break;
155 case PGMPOOL_IDX_PAE_PD_0:
156 HCPhys = pPGM->aHCPhysPaePDs[0];
157 break;
158 case PGMPOOL_IDX_PAE_PD_1:
159 HCPhys = pPGM->aHCPhysPaePDs[1];
160 break;
161 case PGMPOOL_IDX_PAE_PD_2:
162 HCPhys = pPGM->aHCPhysPaePDs[2];
163 break;
164 case PGMPOOL_IDX_PAE_PD_3:
165 HCPhys = pPGM->aHCPhysPaePDs[3];
166 break;
167 case PGMPOOL_IDX_PDPT:
168 HCPhys = pPGM->HCPhysShwPaePdpt;
169 break;
170 case PGMPOOL_IDX_NESTED_ROOT:
171 HCPhys = pPGM->HCPhysShwNestedRoot;
172 break;
173 case PGMPOOL_IDX_PAE_PD:
174 AssertReleaseMsgFailed(("PGMPOOL_IDX_PAE_PD is not usable in VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 context\n"));
175 return NULL;
176# endif
177 default:
178 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
179 return NULL;
180 }
181 AssertMsg(HCPhys && HCPhys != NIL_RTHCPHYS && !(PAGE_OFFSET_MASK & HCPhys), ("%RHp\n", HCPhys));
182
183 void *pv;
184 pgmR0DynMapHCPageInlined(pPGM, HCPhys, &pv);
185 return pv;
186# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
187}
188#endif /* IN_RC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
189
190
191#ifdef PGMPOOL_WITH_MONITORING
192/**
193 * Determin the size of a write instruction.
194 * @returns number of bytes written.
195 * @param pDis The disassembler state.
196 */
197static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
198{
199 /*
200 * This is very crude and possibly wrong for some opcodes,
201 * but since it's not really supposed to be called we can
202 * probably live with that.
203 */
204 return DISGetParamSize(pDis, &pDis->param1);
205}
206
207
208/**
209 * Flushes a chain of pages sharing the same access monitor.
210 *
211 * @returns VBox status code suitable for scheduling.
212 * @param pPool The pool.
213 * @param pPage A page in the chain.
214 */
215int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
216{
217 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
218
219 /*
220 * Find the list head.
221 */
222 uint16_t idx = pPage->idx;
223 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
224 {
225 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
226 {
227 idx = pPage->iMonitoredPrev;
228 Assert(idx != pPage->idx);
229 pPage = &pPool->aPages[idx];
230 }
231 }
232
233 /*
234 * Iterate the list flushing each shadow page.
235 */
236 int rc = VINF_SUCCESS;
237 for (;;)
238 {
239 idx = pPage->iMonitoredNext;
240 Assert(idx != pPage->idx);
241 if (pPage->idx >= PGMPOOL_IDX_FIRST)
242 {
243 int rc2 = pgmPoolFlushPage(pPool, pPage);
244 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
245 rc = VINF_PGM_SYNC_CR3;
246 }
247 /* next */
248 if (idx == NIL_PGMPOOL_IDX)
249 break;
250 pPage = &pPool->aPages[idx];
251 }
252 return rc;
253}
254
255
256/**
257 * Wrapper for getting the current context pointer to the entry being modified.
258 *
259 * @returns VBox status code suitable for scheduling.
260 * @param pVM VM Handle.
261 * @param pvDst Destination address
262 * @param pvSrc Source guest virtual address.
263 * @param GCPhysSrc The source guest physical address.
264 * @param cb Size of data to read
265 */
266DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
267{
268#ifdef IN_RC
269 int rc = MMGCRamRead(pVM, (RTRCPTR)((RTRCUINTPTR)pvDst & ~(cb - 1)), (RTRCPTR)pvSrc, cb);
270 if (RT_FAILURE(rc))
271 rc = PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
272 return rc;
273#elif defined(IN_RING3)
274 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
275 return VINF_SUCCESS;
276#else
277 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
278#endif
279}
280
281/**
282 * Process shadow entries before they are changed by the guest.
283 *
284 * For PT entries we will clear them. For PD entries, we'll simply check
285 * for mapping conflicts and set the SyncCR3 FF if found.
286 *
287 * @param pPool The pool.
288 * @param pPage The head page.
289 * @param GCPhysFault The guest physical fault address.
290 * @param uAddress In R0 and GC this is the guest context fault address (flat).
291 * In R3 this is the host context 'fault' address.
292 * @param pCpu The disassembler state for figuring out the write size.
293 * This need not be specified if the caller knows we won't do cross entry accesses.
294 */
295void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, PDISCPUSTATE pCpu)
296{
297 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
298 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
299 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
300
301 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
302
303 for (;;)
304 {
305 union
306 {
307 void *pv;
308 PX86PT pPT;
309 PX86PTPAE pPTPae;
310 PX86PD pPD;
311 PX86PDPAE pPDPae;
312 PX86PDPT pPDPT;
313 PX86PML4 pPML4;
314 } uShw;
315
316 switch (pPage->enmKind)
317 {
318 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
319 {
320 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
321 const unsigned iShw = off / sizeof(X86PTE);
322 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
323 if (uShw.pPT->a[iShw].n.u1Present)
324 {
325# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
326 X86PTE GstPte;
327
328 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
329 AssertRC(rc);
330 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
331 pgmPoolTracDerefGCPhysHint(pPool, pPage,
332 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
333 GstPte.u & X86_PTE_PG_MASK);
334# endif
335 uShw.pPT->a[iShw].u = 0;
336 }
337 break;
338 }
339
340 /* page/2 sized */
341 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
342 {
343 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
344 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
345 {
346 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
347 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
348 if (uShw.pPTPae->a[iShw].n.u1Present)
349 {
350# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
351 X86PTE GstPte;
352 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
353 AssertRC(rc);
354
355 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
356 pgmPoolTracDerefGCPhysHint(pPool, pPage,
357 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
358 GstPte.u & X86_PTE_PG_MASK);
359# endif
360 uShw.pPTPae->a[iShw].u = 0;
361 }
362 }
363 break;
364 }
365
366# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
367 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
368 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
369 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
370 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
371 {
372 unsigned iGst = off / sizeof(X86PDE);
373 unsigned iShwPdpt = iGst / 256;
374 unsigned iShw = (iGst % 256) * 2;
375 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
376
377 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x idx = %d page idx=%d\n", iGst, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
378 if (iShwPdpt == pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
379 {
380 for (unsigned i=0;i<2;i++)
381 {
382 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
383 {
384 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
385 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
386 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
387 break;
388 }
389 else
390 if (uShw.pPDPae->a[iShw+i].n.u1Present)
391 {
392 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
393 pgmPoolFree(pPool->CTX_SUFF(pVM),
394 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
395 pPage->idx,
396 iShw + i);
397 uShw.pPDPae->a[iShw+i].u = 0;
398 }
399
400 /* paranoia / a bit assumptive. */
401 if ( pCpu
402 && (off & 3)
403 && (off & 3) + cbWrite > 4)
404 {
405 const unsigned iShw2 = iShw + 2 + i;
406 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
407 {
408 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
409 {
410 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
411 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
412 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
413 break;
414 }
415 else
416 if (uShw.pPDPae->a[iShw2].n.u1Present)
417 {
418 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
419 pgmPoolFree(pPool->CTX_SUFF(pVM),
420 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
421 pPage->idx,
422 iShw2);
423 uShw.pPDPae->a[iShw2].u = 0;
424 }
425 }
426 }
427 }
428 }
429 break;
430 }
431# endif
432
433
434 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
435 {
436 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
437 const unsigned iShw = off / sizeof(X86PTEPAE);
438 if (uShw.pPTPae->a[iShw].n.u1Present)
439 {
440# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
441 X86PTEPAE GstPte;
442 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
443 AssertRC(rc);
444
445 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
446 pgmPoolTracDerefGCPhysHint(pPool, pPage,
447 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
448 GstPte.u & X86_PTE_PAE_PG_MASK);
449# endif
450 uShw.pPTPae->a[iShw].u = 0;
451 }
452
453 /* paranoia / a bit assumptive. */
454 if ( pCpu
455 && (off & 7)
456 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
457 {
458 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
459 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
460
461 if (uShw.pPTPae->a[iShw2].n.u1Present)
462 {
463# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
464 X86PTEPAE GstPte;
465# ifdef IN_RING3
466 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
467# else
468 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
469# endif
470 AssertRC(rc);
471 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
472 pgmPoolTracDerefGCPhysHint(pPool, pPage,
473 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
474 GstPte.u & X86_PTE_PAE_PG_MASK);
475# endif
476 uShw.pPTPae->a[iShw2].u = 0;
477 }
478 }
479 break;
480 }
481
482# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
483 case PGMPOOLKIND_32BIT_PD:
484# else
485 case PGMPOOLKIND_ROOT_32BIT_PD:
486# endif
487 {
488 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
489 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
490
491# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
492 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
493# endif
494 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
495 {
496 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
497 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
498 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
499 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
500 break;
501 }
502# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
503 else
504 {
505 if (uShw.pPD->a[iShw].n.u1Present)
506 {
507 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
508 pgmPoolFree(pPool->CTX_SUFF(pVM),
509 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
510 pPage->idx,
511 iShw);
512 uShw.pPD->a[iShw].u = 0;
513 }
514 }
515# endif
516 /* paranoia / a bit assumptive. */
517 if ( pCpu
518 && (off & 3)
519 && (off & 3) + cbWrite > sizeof(X86PTE))
520 {
521 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
522 if ( iShw2 != iShw
523 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
524 {
525
526 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
527 {
528 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
529 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
530 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
531 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
532 }
533# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
534 else
535 {
536 if (uShw.pPD->a[iShw2].n.u1Present)
537 {
538 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
539 pgmPoolFree(pPool->CTX_SUFF(pVM),
540 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
541 pPage->idx,
542 iShw2);
543 uShw.pPD->a[iShw2].u = 0;
544 }
545 }
546# endif
547 }
548 }
549#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
550 if ( uShw.pPD->a[iShw].n.u1Present
551 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
552 {
553 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
554# ifdef IN_RC /* TLB load - we're pushing things a bit... */
555 ASMProbeReadByte(pvAddress);
556# endif
557 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
558 uShw.pPD->a[iShw].u = 0;
559 }
560#endif
561 break;
562 }
563
564# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
565 case PGMPOOLKIND_ROOT_PAE_PD:
566 {
567 unsigned iGst = off / sizeof(X86PDE); // ASSUMING 32-bit guest paging!
568 unsigned iShwPdpt = iGst / 256;
569 unsigned iShw = (iGst % 256) * 2;
570 Assert(pPage->idx == PGMPOOL_IDX_PAE_PD);
571 PPGMPOOLPAGE pPage2 = pPage + 1 + iShwPdpt;
572 Assert(pPage2->idx == PGMPOOL_IDX_PAE_PD_0 + iShwPdpt);
573 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage2);
574 for (unsigned i = 0; i < 2; i++, iShw++)
575 {
576 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
577 {
578 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
579 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
580 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw));
581 }
582 /* paranoia / a bit assumptive. */
583 else if ( pCpu
584 && (off & 3)
585 && (off & 3) + cbWrite > 4)
586 {
587 const unsigned iShw2 = iShw + 2;
588 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a) /** @todo was completely wrong, it's better now after #1865 but still wrong from cross PD. */
589 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
590 {
591 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
592 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
593 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
594 }
595 }
596#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
597 if ( uShw.pPDPae->a[iShw].n.u1Present
598 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
599 {
600 LogFlow(("pgmPoolMonitorChainChanging: iShwPdpt=%#x iShw=%#x: %RX64 -> freeing it!\n", iShwPdpt, iShw, uShw.pPDPae->a[iShw].u));
601# ifdef IN_RC /* TLB load - we're pushing things a bit... */
602 ASMProbeReadByte(pvAddress);
603# endif
604 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + iShwPdpt * X86_PG_PAE_ENTRIES);
605 uShw.pPDPae->a[iShw].u = 0;
606 }
607#endif
608 }
609 break;
610 }
611# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
612
613 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
614 {
615 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
616 const unsigned iShw = off / sizeof(X86PDEPAE);
617 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
618 {
619 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
620 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
621 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
622 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
623 break;
624 }
625#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
626 /*
627 * Causes trouble when the guest uses a PDE to refer to the whole page table level
628 * structure. (Invalidate here; faults later on when it tries to change the page
629 * table entries -> recheck; probably only applies to the RC case.)
630 */
631 else
632 {
633 if (uShw.pPDPae->a[iShw].n.u1Present)
634 {
635 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
636 pgmPoolFree(pPool->CTX_SUFF(pVM),
637 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
638# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
639 pPage->idx,
640 iShw);
641# else
642 /* Note: hardcoded PAE implementation dependency */
643 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
644 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
645# endif
646 uShw.pPDPae->a[iShw].u = 0;
647 }
648 }
649#endif
650 /* paranoia / a bit assumptive. */
651 if ( pCpu
652 && (off & 7)
653 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
654 {
655 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
656 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
657
658 if ( iShw2 != iShw
659 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
660 {
661 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
662 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
663 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
664 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
665 }
666#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
667 else if (uShw.pPDPae->a[iShw2].n.u1Present)
668 {
669 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
670 pgmPoolFree(pPool->CTX_SUFF(pVM),
671 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
672# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
673 pPage->idx,
674 iShw2);
675# else
676 /* Note: hardcoded PAE implementation dependency */
677 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
678 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
679# endif
680 uShw.pPDPae->a[iShw2].u = 0;
681 }
682#endif
683 }
684 break;
685 }
686
687# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
688 case PGMPOOLKIND_PAE_PDPT:
689# else
690 case PGMPOOLKIND_ROOT_PDPT:
691# endif
692 {
693 /*
694 * Hopefully this doesn't happen very often:
695 * - touching unused parts of the page
696 * - messing with the bits of pd pointers without changing the physical address
697 */
698# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
699 /* PDPT roots are not page aligned; 32 byte only! */
700 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
701# else
702 const unsigned offPdpt = off;
703# endif
704 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
705 const unsigned iShw = offPdpt / sizeof(X86PDPE);
706 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
707 {
708 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
709 {
710 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
711 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
712 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
713 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
714 break;
715 }
716# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
717 else
718 if (uShw.pPDPT->a[iShw].n.u1Present)
719 {
720 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
721 pgmPoolFree(pPool->CTX_SUFF(pVM),
722 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
723 pPage->idx,
724 iShw);
725 uShw.pPDPT->a[iShw].u = 0;
726 }
727# endif
728
729 /* paranoia / a bit assumptive. */
730 if ( pCpu
731 && (offPdpt & 7)
732 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
733 {
734 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
735 if ( iShw2 != iShw
736 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
737 {
738 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
739 {
740 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
741 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
742 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
743 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
744 }
745# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
746 else
747 if (uShw.pPDPT->a[iShw2].n.u1Present)
748 {
749 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
750 pgmPoolFree(pPool->CTX_SUFF(pVM),
751 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
752 pPage->idx,
753 iShw2);
754 uShw.pPDPT->a[iShw2].u = 0;
755 }
756# endif
757 }
758 }
759 }
760 break;
761 }
762
763#ifndef IN_RC
764 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
765 {
766 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
767 const unsigned iShw = off / sizeof(X86PDEPAE);
768 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
769 {
770 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
771 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
772 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
773 }
774 else
775 {
776 if (uShw.pPDPae->a[iShw].n.u1Present)
777 {
778 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
779 pgmPoolFree(pPool->CTX_SUFF(pVM),
780 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
781 pPage->idx,
782 iShw);
783 uShw.pPDPae->a[iShw].u = 0;
784 }
785 }
786 /* paranoia / a bit assumptive. */
787 if ( pCpu
788 && (off & 7)
789 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
790 {
791 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
792 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
793
794 if ( iShw2 != iShw
795 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
796 {
797 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
798 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
799 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
800 }
801 else
802 if (uShw.pPDPae->a[iShw2].n.u1Present)
803 {
804 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
805 pgmPoolFree(pPool->CTX_SUFF(pVM),
806 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
807 pPage->idx,
808 iShw2);
809 uShw.pPDPae->a[iShw2].u = 0;
810 }
811 }
812 break;
813 }
814
815 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
816 {
817 /*
818 * Hopefully this doesn't happen very often:
819 * - messing with the bits of pd pointers without changing the physical address
820 */
821# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
822 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
823# endif
824 {
825 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
826 const unsigned iShw = off / sizeof(X86PDPE);
827 if (uShw.pPDPT->a[iShw].n.u1Present)
828 {
829 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
830 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
831 uShw.pPDPT->a[iShw].u = 0;
832 }
833 /* paranoia / a bit assumptive. */
834 if ( pCpu
835 && (off & 7)
836 && (off & 7) + cbWrite > sizeof(X86PDPE))
837 {
838 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
839 if (uShw.pPDPT->a[iShw2].n.u1Present)
840 {
841 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
842 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
843 uShw.pPDPT->a[iShw2].u = 0;
844 }
845 }
846 }
847 break;
848 }
849
850 case PGMPOOLKIND_64BIT_PML4:
851 {
852 /*
853 * Hopefully this doesn't happen very often:
854 * - messing with the bits of pd pointers without changing the physical address
855 */
856# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
857 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
858# endif
859 {
860 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
861 const unsigned iShw = off / sizeof(X86PDPE);
862 if (uShw.pPML4->a[iShw].n.u1Present)
863 {
864 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
865 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
866 uShw.pPML4->a[iShw].u = 0;
867 }
868 /* paranoia / a bit assumptive. */
869 if ( pCpu
870 && (off & 7)
871 && (off & 7) + cbWrite > sizeof(X86PDPE))
872 {
873 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
874 if (uShw.pPML4->a[iShw2].n.u1Present)
875 {
876 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
877 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
878 uShw.pPML4->a[iShw2].u = 0;
879 }
880 }
881 }
882 break;
883 }
884#endif /* IN_RING0 */
885
886 default:
887 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
888 }
889
890 /* next */
891 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
892 return;
893 pPage = &pPool->aPages[pPage->iMonitoredNext];
894 }
895}
896
897#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
898/**
899 * Checks if the page is the active CR3 or is one of the four PDs of a PAE PDPT
900 *
901 * @returns VBox status code (appropriate for GC return).
902 * @param pVM VM Handle.
903 * @param pPage PGM pool page
904 */
905bool pgmPoolIsActiveRootPage(PVM pVM, PPGMPOOLPAGE pPage)
906{
907 /* First check the simple case. */
908 if (pPage == pVM->pgm.s.CTX_SUFF(pShwPageCR3))
909 {
910 LogFlow(("pgmPoolIsActiveRootPage found CR3 root\n"));
911 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
912 return true;
913 }
914
915 switch (PGMGetShadowMode(pVM))
916 {
917 case PGMMODE_PAE:
918 case PGMMODE_PAE_NX:
919 {
920 switch (pPage->enmKind)
921 {
922 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
923 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
924 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
925 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
926 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
927 {
928 PX86PDPT pPdpt = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
929 Assert(pPdpt);
930
931 for (unsigned i=0;i<X86_PG_PAE_PDPE_ENTRIES;i++)
932 {
933 if ( pPdpt->a[i].n.u1Present
934 && pPage->Core.Key == pPdpt->a[i].u & X86_PDPE_PG_MASK)
935 {
936 LogFlow(("pgmPoolIsActiveRootPage found PAE PDPE root\n"));
937 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
938 return true;
939 }
940 }
941 break;
942 }
943 }
944
945 break;
946 }
947 }
948
949 return false;
950}
951#endif /* VBOX_WITH_PGMPOOL_PAGING_ONLY */
952
953
954# ifndef IN_RING3
955/**
956 * Checks if a access could be a fork operation in progress.
957 *
958 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
959 *
960 * @returns true if it's likly that we're forking, otherwise false.
961 * @param pPool The pool.
962 * @param pCpu The disassembled instruction.
963 * @param offFault The access offset.
964 */
965DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
966{
967 /*
968 * i386 linux is using btr to clear X86_PTE_RW.
969 * The functions involved are (2.6.16 source inspection):
970 * clear_bit
971 * ptep_set_wrprotect
972 * copy_one_pte
973 * copy_pte_range
974 * copy_pmd_range
975 * copy_pud_range
976 * copy_page_range
977 * dup_mmap
978 * dup_mm
979 * copy_mm
980 * copy_process
981 * do_fork
982 */
983 if ( pCpu->pCurInstr->opcode == OP_BTR
984 && !(offFault & 4)
985 /** @todo Validate that the bit index is X86_PTE_RW. */
986 )
987 {
988 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
989 return true;
990 }
991 return false;
992}
993
994
995/**
996 * Determine whether the page is likely to have been reused.
997 *
998 * @returns true if we consider the page as being reused for a different purpose.
999 * @returns false if we consider it to still be a paging page.
1000 * @param pVM VM Handle.
1001 * @param pPage The page in question.
1002 * @param pRegFrame Trap register frame.
1003 * @param pCpu The disassembly info for the faulting instruction.
1004 * @param pvFault The fault address.
1005 *
1006 * @remark The REP prefix check is left to the caller because of STOSD/W.
1007 */
1008DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
1009{
1010#ifndef IN_RC
1011 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
1012 if ( HWACCMHasPendingIrq(pVM)
1013 && (pRegFrame->rsp - pvFault) < 32)
1014 {
1015 /* Fault caused by stack writes while trying to inject an interrupt event. */
1016 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
1017 return true;
1018 }
1019#else
1020 NOREF(pVM); NOREF(pvFault);
1021#endif
1022
1023 switch (pCpu->pCurInstr->opcode)
1024 {
1025 /* call implies the actual push of the return address faulted */
1026 case OP_CALL:
1027 Log4(("pgmPoolMonitorIsReused: CALL\n"));
1028 return true;
1029 case OP_PUSH:
1030 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
1031 return true;
1032 case OP_PUSHF:
1033 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
1034 return true;
1035 case OP_PUSHA:
1036 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
1037 return true;
1038 case OP_FXSAVE:
1039 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
1040 return true;
1041 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
1042 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
1043 return true;
1044 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
1045 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
1046 return true;
1047 case OP_MOVSWD:
1048 case OP_STOSWD:
1049 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
1050 && pRegFrame->rcx >= 0x40
1051 )
1052 {
1053 Assert(pCpu->mode == CPUMODE_64BIT);
1054
1055 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
1056 return true;
1057 }
1058 return false;
1059 }
1060 if ( (pCpu->param1.flags & USE_REG_GEN32)
1061 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
1062 {
1063 Log4(("pgmPoolMonitorIsReused: ESP\n"));
1064 return true;
1065 }
1066
1067 //if (pPage->fCR3Mix)
1068 // return false;
1069 return false;
1070}
1071
1072
1073/**
1074 * Flushes the page being accessed.
1075 *
1076 * @returns VBox status code suitable for scheduling.
1077 * @param pVM The VM handle.
1078 * @param pPool The pool.
1079 * @param pPage The pool page (head).
1080 * @param pCpu The disassembly of the write instruction.
1081 * @param pRegFrame The trap register frame.
1082 * @param GCPhysFault The fault address as guest physical address.
1083 * @param pvFault The fault address.
1084 */
1085static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1086 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1087{
1088 /*
1089 * First, do the flushing.
1090 */
1091 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
1092
1093 /*
1094 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
1095 */
1096 uint32_t cbWritten;
1097 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
1098 if (RT_SUCCESS(rc2))
1099 pRegFrame->rip += pCpu->opsize;
1100 else if (rc2 == VERR_EM_INTERPRETER)
1101 {
1102#ifdef IN_RC
1103 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
1104 {
1105 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
1106 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
1107 rc = VINF_SUCCESS;
1108 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
1109 }
1110 else
1111#endif
1112 {
1113 rc = VINF_EM_RAW_EMULATE_INSTR;
1114 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1115 }
1116 }
1117 else
1118 rc = rc2;
1119
1120 /* See use in pgmPoolAccessHandlerSimple(). */
1121 PGM_INVL_GUEST_TLBS();
1122
1123 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
1124 return rc;
1125
1126}
1127
1128
1129/**
1130 * Handles the STOSD write accesses.
1131 *
1132 * @returns VBox status code suitable for scheduling.
1133 * @param pVM The VM handle.
1134 * @param pPool The pool.
1135 * @param pPage The pool page (head).
1136 * @param pCpu The disassembly of the write instruction.
1137 * @param pRegFrame The trap register frame.
1138 * @param GCPhysFault The fault address as guest physical address.
1139 * @param pvFault The fault address.
1140 */
1141DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1142 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1143{
1144 Assert(pCpu->mode == CPUMODE_32BIT);
1145
1146 Log3(("pgmPoolAccessHandlerSTOSD\n"));
1147
1148 /*
1149 * Increment the modification counter and insert it into the list
1150 * of modified pages the first time.
1151 */
1152 if (!pPage->cModifications++)
1153 pgmPoolMonitorModifiedInsert(pPool, pPage);
1154
1155 /*
1156 * Execute REP STOSD.
1157 *
1158 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
1159 * write situation, meaning that it's safe to write here.
1160 */
1161#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1162 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1163#endif
1164 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
1165 while (pRegFrame->ecx)
1166 {
1167#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1168 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1169 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1170 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1171#else
1172 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1173#endif
1174#ifdef IN_RC
1175 *(uint32_t *)pu32 = pRegFrame->eax;
1176#else
1177 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
1178#endif
1179 pu32 += 4;
1180 GCPhysFault += 4;
1181 pRegFrame->edi += 4;
1182 pRegFrame->ecx--;
1183 }
1184 pRegFrame->rip += pCpu->opsize;
1185
1186 /* See use in pgmPoolAccessHandlerSimple(). */
1187 PGM_INVL_GUEST_TLBS();
1188
1189 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1190 return VINF_SUCCESS;
1191}
1192
1193
1194/**
1195 * Handles the simple write accesses.
1196 *
1197 * @returns VBox status code suitable for scheduling.
1198 * @param pVM The VM handle.
1199 * @param pPool The pool.
1200 * @param pPage The pool page (head).
1201 * @param pCpu The disassembly of the write instruction.
1202 * @param pRegFrame The trap register frame.
1203 * @param GCPhysFault The fault address as guest physical address.
1204 * @param pvFault The fault address.
1205 */
1206DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1207 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1208{
1209 Log3(("pgmPoolAccessHandlerSimple\n"));
1210 /*
1211 * Increment the modification counter and insert it into the list
1212 * of modified pages the first time.
1213 */
1214 if (!pPage->cModifications++)
1215 pgmPoolMonitorModifiedInsert(pPool, pPage);
1216
1217 /*
1218 * Clear all the pages. ASSUMES that pvFault is readable.
1219 */
1220#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1221 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1222 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1223 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1224 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1225#else
1226 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1227#endif
1228
1229 /*
1230 * Interpret the instruction.
1231 */
1232 uint32_t cb;
1233 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
1234 if (RT_SUCCESS(rc))
1235 pRegFrame->rip += pCpu->opsize;
1236 else if (rc == VERR_EM_INTERPRETER)
1237 {
1238 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1239 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
1240 rc = VINF_EM_RAW_EMULATE_INSTR;
1241 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1242 }
1243
1244 /*
1245 * Quick hack, with logging enabled we're getting stale
1246 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1247 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1248 * have to be fixed to support this. But that'll have to wait till next week.
1249 *
1250 * An alternative is to keep track of the changed PTEs together with the
1251 * GCPhys from the guest PT. This may proove expensive though.
1252 *
1253 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1254 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1255 */
1256 PGM_INVL_GUEST_TLBS();
1257
1258 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1259 return rc;
1260}
1261
1262/**
1263 * \#PF Handler callback for PT write accesses.
1264 *
1265 * @returns VBox status code (appropriate for GC return).
1266 * @param pVM VM Handle.
1267 * @param uErrorCode CPU Error code.
1268 * @param pRegFrame Trap register frame.
1269 * NULL on DMA and other non CPU access.
1270 * @param pvFault The fault address (cr2).
1271 * @param GCPhysFault The GC physical address corresponding to pvFault.
1272 * @param pvUser User argument.
1273 */
1274DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1275{
1276 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1277 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1278 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1279 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1280
1281 /*
1282 * We should ALWAYS have the list head as user parameter. This
1283 * is because we use that page to record the changes.
1284 */
1285 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1286
1287 /*
1288 * Disassemble the faulting instruction.
1289 */
1290 DISCPUSTATE Cpu;
1291 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1292 AssertRCReturn(rc, rc);
1293
1294 /*
1295 * Check if it's worth dealing with.
1296 */
1297 bool fReused = false;
1298 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1299#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1300 || pgmPoolIsActiveRootPage(pVM, pPage)
1301#else
1302 || pPage->fCR3Mix
1303#endif
1304 )
1305 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1306 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1307 {
1308 /*
1309 * Simple instructions, no REP prefix.
1310 */
1311 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1312 {
1313 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1314 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1315 return rc;
1316 }
1317
1318 /*
1319 * Windows is frequently doing small memset() operations (netio test 4k+).
1320 * We have to deal with these or we'll kill the cache and performance.
1321 */
1322 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1323 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1324 && pRegFrame->ecx <= 0x20
1325 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1326 && !((uintptr_t)pvFault & 3)
1327 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1328 && Cpu.mode == CPUMODE_32BIT
1329 && Cpu.opmode == CPUMODE_32BIT
1330 && Cpu.addrmode == CPUMODE_32BIT
1331 && Cpu.prefix == PREFIX_REP
1332 && !pRegFrame->eflags.Bits.u1DF
1333 )
1334 {
1335 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1336 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1337 return rc;
1338 }
1339
1340 /* REP prefix, don't bother. */
1341 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1342 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1343 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1344 }
1345
1346 /*
1347 * Not worth it, so flush it.
1348 *
1349 * If we considered it to be reused, don't to back to ring-3
1350 * to emulate failed instructions since we usually cannot
1351 * interpret then. This may be a bit risky, in which case
1352 * the reuse detection must be fixed.
1353 */
1354 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1355 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1356 rc = VINF_SUCCESS;
1357 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1358 return rc;
1359}
1360
1361# endif /* !IN_RING3 */
1362#endif /* PGMPOOL_WITH_MONITORING */
1363
1364#ifdef PGMPOOL_WITH_CACHE
1365
1366/**
1367 * Inserts a page into the GCPhys hash table.
1368 *
1369 * @param pPool The pool.
1370 * @param pPage The page.
1371 */
1372DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1373{
1374 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1375 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1376 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1377 pPage->iNext = pPool->aiHash[iHash];
1378 pPool->aiHash[iHash] = pPage->idx;
1379}
1380
1381
1382/**
1383 * Removes a page from the GCPhys hash table.
1384 *
1385 * @param pPool The pool.
1386 * @param pPage The page.
1387 */
1388DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1389{
1390 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1391 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1392 if (pPool->aiHash[iHash] == pPage->idx)
1393 pPool->aiHash[iHash] = pPage->iNext;
1394 else
1395 {
1396 uint16_t iPrev = pPool->aiHash[iHash];
1397 for (;;)
1398 {
1399 const int16_t i = pPool->aPages[iPrev].iNext;
1400 if (i == pPage->idx)
1401 {
1402 pPool->aPages[iPrev].iNext = pPage->iNext;
1403 break;
1404 }
1405 if (i == NIL_PGMPOOL_IDX)
1406 {
1407 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1408 break;
1409 }
1410 iPrev = i;
1411 }
1412 }
1413 pPage->iNext = NIL_PGMPOOL_IDX;
1414}
1415
1416
1417/**
1418 * Frees up one cache page.
1419 *
1420 * @returns VBox status code.
1421 * @retval VINF_SUCCESS on success.
1422 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1423 * @param pPool The pool.
1424 * @param iUser The user index.
1425 */
1426static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1427{
1428#ifndef IN_RC
1429 const PVM pVM = pPool->CTX_SUFF(pVM);
1430#endif
1431 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1432 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1433
1434 /*
1435 * Select one page from the tail of the age list.
1436 */
1437 uint16_t iToFree = pPool->iAgeTail;
1438 if (iToFree == iUser)
1439 iToFree = pPool->aPages[iToFree].iAgePrev;
1440/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1441 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1442 {
1443 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1444 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1445 {
1446 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1447 continue;
1448 iToFree = i;
1449 break;
1450 }
1451 }
1452*/
1453
1454 Assert(iToFree != iUser);
1455 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1456
1457 PPGMPOOLPAGE pPage = &pPool->aPages[iToFree];
1458
1459 /*
1460 * Reject any attempts at flushing the currently active shadow CR3 mapping
1461 */
1462 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
1463 {
1464 /* Refresh the cr3 mapping by putting it at the head of the age list. */
1465 LogFlow(("pgmPoolCacheFreeOne refuse CR3 mapping\n"));
1466 pgmPoolCacheUsed(pPool, pPage);
1467 return pgmPoolCacheFreeOne(pPool, iUser);
1468 }
1469
1470 int rc = pgmPoolFlushPage(pPool, pPage);
1471 if (rc == VINF_SUCCESS)
1472 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1473 return rc;
1474}
1475
1476
1477/**
1478 * Checks if a kind mismatch is really a page being reused
1479 * or if it's just normal remappings.
1480 *
1481 * @returns true if reused and the cached page (enmKind1) should be flushed
1482 * @returns false if not reused.
1483 * @param enmKind1 The kind of the cached page.
1484 * @param enmKind2 The kind of the requested page.
1485 */
1486static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1487{
1488 switch (enmKind1)
1489 {
1490 /*
1491 * Never reuse them. There is no remapping in non-paging mode.
1492 */
1493 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1494 case PGMPOOLKIND_32BIT_PD_PHYS:
1495 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1496 case PGMPOOLKIND_PAE_PD_PHYS:
1497 case PGMPOOLKIND_PAE_PDPT_PHYS:
1498 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1499 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1500 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1501 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1502 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1503#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1504 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1505 return false;
1506#else
1507 return true;
1508#endif
1509
1510 /*
1511 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1512 */
1513 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1514 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1515 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1516 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1517 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1518 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1519 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1520 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1521 case PGMPOOLKIND_32BIT_PD:
1522 switch (enmKind2)
1523 {
1524 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1525 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1526 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1527 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1528 case PGMPOOLKIND_64BIT_PML4:
1529 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1530 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1531 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1532 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1533 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1534 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1535 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1536 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1537 return true;
1538 default:
1539 return false;
1540 }
1541
1542 /*
1543 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1544 */
1545 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1546 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1547 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1548 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1549 case PGMPOOLKIND_64BIT_PML4:
1550 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1551 switch (enmKind2)
1552 {
1553 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1554 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1555 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1556 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1557 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1558 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1559 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1560 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1561 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1562 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1563 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1564 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1565 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1566 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1567 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1568 return true;
1569 default:
1570 return false;
1571 }
1572
1573 /*
1574 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1575 */
1576#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1577 case PGMPOOLKIND_ROOT_32BIT_PD:
1578 case PGMPOOLKIND_ROOT_PAE_PD:
1579 case PGMPOOLKIND_ROOT_PDPT:
1580#endif
1581 case PGMPOOLKIND_ROOT_NESTED:
1582 return false;
1583
1584 default:
1585 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1586 }
1587}
1588
1589
1590/**
1591 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1592 *
1593 * @returns VBox status code.
1594 * @retval VINF_PGM_CACHED_PAGE on success.
1595 * @retval VERR_FILE_NOT_FOUND if not found.
1596 * @param pPool The pool.
1597 * @param GCPhys The GC physical address of the page we're gonna shadow.
1598 * @param enmKind The kind of mapping.
1599 * @param iUser The shadow page pool index of the user table.
1600 * @param iUserTable The index into the user table (shadowed).
1601 * @param ppPage Where to store the pointer to the page.
1602 */
1603static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1604{
1605#ifndef IN_RC
1606 const PVM pVM = pPool->CTX_SUFF(pVM);
1607#endif
1608 /*
1609 * Look up the GCPhys in the hash.
1610 */
1611 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1612 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
1613 if (i != NIL_PGMPOOL_IDX)
1614 {
1615 do
1616 {
1617 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1618 Log3(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1619 if (pPage->GCPhys == GCPhys)
1620 {
1621 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1622 {
1623 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1624 if (RT_SUCCESS(rc))
1625 {
1626 *ppPage = pPage;
1627 STAM_COUNTER_INC(&pPool->StatCacheHits);
1628 return VINF_PGM_CACHED_PAGE;
1629 }
1630 return rc;
1631 }
1632
1633 /*
1634 * The kind is different. In some cases we should now flush the page
1635 * as it has been reused, but in most cases this is normal remapping
1636 * of PDs as PT or big pages using the GCPhys field in a slightly
1637 * different way than the other kinds.
1638 */
1639 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1640 {
1641 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1642 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1643 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1644 break;
1645 }
1646 }
1647
1648 /* next */
1649 i = pPage->iNext;
1650 } while (i != NIL_PGMPOOL_IDX);
1651 }
1652
1653 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
1654 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1655 return VERR_FILE_NOT_FOUND;
1656}
1657
1658
1659/**
1660 * Inserts a page into the cache.
1661 *
1662 * @param pPool The pool.
1663 * @param pPage The cached page.
1664 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1665 */
1666static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1667{
1668 /*
1669 * Insert into the GCPhys hash if the page is fit for that.
1670 */
1671 Assert(!pPage->fCached);
1672 if (fCanBeCached)
1673 {
1674 pPage->fCached = true;
1675 pgmPoolHashInsert(pPool, pPage);
1676 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1677 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1678 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1679 }
1680 else
1681 {
1682 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1683 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1684 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1685 }
1686
1687 /*
1688 * Insert at the head of the age list.
1689 */
1690 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1691 pPage->iAgeNext = pPool->iAgeHead;
1692 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1693 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1694 else
1695 pPool->iAgeTail = pPage->idx;
1696 pPool->iAgeHead = pPage->idx;
1697}
1698
1699
1700/**
1701 * Flushes a cached page.
1702 *
1703 * @param pPool The pool.
1704 * @param pPage The cached page.
1705 */
1706static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1707{
1708 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1709
1710 /*
1711 * Remove the page from the hash.
1712 */
1713 if (pPage->fCached)
1714 {
1715 pPage->fCached = false;
1716 pgmPoolHashRemove(pPool, pPage);
1717 }
1718 else
1719 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1720
1721 /*
1722 * Remove it from the age list.
1723 */
1724 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1725 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1726 else
1727 pPool->iAgeTail = pPage->iAgePrev;
1728 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1729 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1730 else
1731 pPool->iAgeHead = pPage->iAgeNext;
1732 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1733 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1734}
1735
1736#endif /* PGMPOOL_WITH_CACHE */
1737#ifdef PGMPOOL_WITH_MONITORING
1738
1739/**
1740 * Looks for pages sharing the monitor.
1741 *
1742 * @returns Pointer to the head page.
1743 * @returns NULL if not found.
1744 * @param pPool The Pool
1745 * @param pNewPage The page which is going to be monitored.
1746 */
1747static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1748{
1749#ifdef PGMPOOL_WITH_CACHE
1750 /*
1751 * Look up the GCPhys in the hash.
1752 */
1753 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1754 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1755 if (i == NIL_PGMPOOL_IDX)
1756 return NULL;
1757 do
1758 {
1759 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1760 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1761 && pPage != pNewPage)
1762 {
1763 switch (pPage->enmKind)
1764 {
1765 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1766 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1767 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1768 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1769 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1770 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1771 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1772 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1773 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1774 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1775 case PGMPOOLKIND_64BIT_PML4:
1776#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1777 case PGMPOOLKIND_32BIT_PD:
1778 case PGMPOOLKIND_PAE_PDPT:
1779#else
1780 case PGMPOOLKIND_ROOT_32BIT_PD:
1781 case PGMPOOLKIND_ROOT_PAE_PD:
1782 case PGMPOOLKIND_ROOT_PDPT:
1783#endif
1784 {
1785 /* find the head */
1786 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1787 {
1788 Assert(pPage->iMonitoredPrev != pPage->idx);
1789 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1790 }
1791 return pPage;
1792 }
1793
1794 /* ignore, no monitoring. */
1795 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1796 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1797 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1798 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1799 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1800 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1801 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1802 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1803 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1804 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1805 case PGMPOOLKIND_ROOT_NESTED:
1806 case PGMPOOLKIND_PAE_PD_PHYS:
1807 case PGMPOOLKIND_PAE_PDPT_PHYS:
1808 case PGMPOOLKIND_32BIT_PD_PHYS:
1809#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1810 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1811#endif
1812 break;
1813 default:
1814 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1815 }
1816 }
1817
1818 /* next */
1819 i = pPage->iNext;
1820 } while (i != NIL_PGMPOOL_IDX);
1821#endif
1822 return NULL;
1823}
1824
1825
1826/**
1827 * Enabled write monitoring of a guest page.
1828 *
1829 * @returns VBox status code.
1830 * @retval VINF_SUCCESS on success.
1831 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1832 * @param pPool The pool.
1833 * @param pPage The cached page.
1834 */
1835static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1836{
1837 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1838
1839 /*
1840 * Filter out the relevant kinds.
1841 */
1842 switch (pPage->enmKind)
1843 {
1844 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1845 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1846 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1847 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1848 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1849 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1850 case PGMPOOLKIND_64BIT_PML4:
1851#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1852 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1853 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1854 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1855 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1856 case PGMPOOLKIND_32BIT_PD:
1857 case PGMPOOLKIND_PAE_PDPT:
1858#else
1859 case PGMPOOLKIND_ROOT_PDPT:
1860#endif
1861 break;
1862
1863 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1864 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1865 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1866 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1867 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1868 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1869 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1870 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1871 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1872 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1873 case PGMPOOLKIND_ROOT_NESTED:
1874 /* Nothing to monitor here. */
1875 return VINF_SUCCESS;
1876
1877#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1878 case PGMPOOLKIND_32BIT_PD_PHYS:
1879 case PGMPOOLKIND_PAE_PDPT_PHYS:
1880 case PGMPOOLKIND_PAE_PD_PHYS:
1881 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1882 /* Nothing to monitor here. */
1883 return VINF_SUCCESS;
1884#else
1885 case PGMPOOLKIND_ROOT_32BIT_PD:
1886 case PGMPOOLKIND_ROOT_PAE_PD:
1887#endif
1888#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1889 break;
1890#else
1891 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1892#endif
1893 default:
1894 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1895 }
1896
1897 /*
1898 * Install handler.
1899 */
1900 int rc;
1901 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1902 if (pPageHead)
1903 {
1904 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1905 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1906 pPage->iMonitoredPrev = pPageHead->idx;
1907 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1908 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1909 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1910 pPageHead->iMonitoredNext = pPage->idx;
1911 rc = VINF_SUCCESS;
1912 }
1913 else
1914 {
1915 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1916 PVM pVM = pPool->CTX_SUFF(pVM);
1917 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1918 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1919 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1920 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1921 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1922 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1923 pPool->pszAccessHandler);
1924 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1925 * the heap size should suffice. */
1926 AssertFatalRC(rc);
1927 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1928 rc = VERR_PGM_POOL_CLEARED;
1929 }
1930 pPage->fMonitored = true;
1931 return rc;
1932}
1933
1934
1935/**
1936 * Disables write monitoring of a guest page.
1937 *
1938 * @returns VBox status code.
1939 * @retval VINF_SUCCESS on success.
1940 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1941 * @param pPool The pool.
1942 * @param pPage The cached page.
1943 */
1944static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1945{
1946 /*
1947 * Filter out the relevant kinds.
1948 */
1949 switch (pPage->enmKind)
1950 {
1951 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1952 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1953 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1954 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1955 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1956 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1957 case PGMPOOLKIND_64BIT_PML4:
1958#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1959 case PGMPOOLKIND_32BIT_PD:
1960 case PGMPOOLKIND_PAE_PDPT:
1961 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1962 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1963 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1964 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1965#else
1966 case PGMPOOLKIND_ROOT_PDPT:
1967#endif
1968 break;
1969
1970 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1971 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1972 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1973 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1974 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1975 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1976 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1977 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1978 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1979 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1980 case PGMPOOLKIND_ROOT_NESTED:
1981 case PGMPOOLKIND_PAE_PD_PHYS:
1982 case PGMPOOLKIND_PAE_PDPT_PHYS:
1983 case PGMPOOLKIND_32BIT_PD_PHYS:
1984 /* Nothing to monitor here. */
1985 return VINF_SUCCESS;
1986
1987#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1988 case PGMPOOLKIND_ROOT_32BIT_PD:
1989 case PGMPOOLKIND_ROOT_PAE_PD:
1990#endif
1991#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1992 break;
1993#endif
1994#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1995 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1996#endif
1997 default:
1998 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1999 }
2000
2001 /*
2002 * Remove the page from the monitored list or uninstall it if last.
2003 */
2004 const PVM pVM = pPool->CTX_SUFF(pVM);
2005 int rc;
2006 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2007 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2008 {
2009 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2010 {
2011 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2012 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2013#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2014 pNewHead->fCR3Mix = pPage->fCR3Mix;
2015#endif
2016 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2017 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2018 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2019 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2020 pPool->pszAccessHandler);
2021 AssertFatalRCSuccess(rc);
2022 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2023 }
2024 else
2025 {
2026 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2027 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2028 {
2029 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2030 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2031 }
2032 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2033 rc = VINF_SUCCESS;
2034 }
2035 }
2036 else
2037 {
2038 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2039 AssertFatalRC(rc);
2040 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2041 rc = VERR_PGM_POOL_CLEARED;
2042 }
2043 pPage->fMonitored = false;
2044
2045 /*
2046 * Remove it from the list of modified pages (if in it).
2047 */
2048 pgmPoolMonitorModifiedRemove(pPool, pPage);
2049
2050 return rc;
2051}
2052
2053# if defined(PGMPOOL_WITH_MIXED_PT_CR3) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2054
2055/**
2056 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
2057 *
2058 * @param pPool The Pool.
2059 * @param pPage A page in the chain.
2060 * @param fCR3Mix The new fCR3Mix value.
2061 */
2062static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
2063{
2064 /* current */
2065 pPage->fCR3Mix = fCR3Mix;
2066
2067 /* before */
2068 int16_t idx = pPage->iMonitoredPrev;
2069 while (idx != NIL_PGMPOOL_IDX)
2070 {
2071 pPool->aPages[idx].fCR3Mix = fCR3Mix;
2072 idx = pPool->aPages[idx].iMonitoredPrev;
2073 }
2074
2075 /* after */
2076 idx = pPage->iMonitoredNext;
2077 while (idx != NIL_PGMPOOL_IDX)
2078 {
2079 pPool->aPages[idx].fCR3Mix = fCR3Mix;
2080 idx = pPool->aPages[idx].iMonitoredNext;
2081 }
2082}
2083
2084
2085/**
2086 * Installs or modifies monitoring of a CR3 page (special).
2087 *
2088 * We're pretending the CR3 page is shadowed by the pool so we can use the
2089 * generic mechanisms in detecting chained monitoring. (This also gives us a
2090 * tast of what code changes are required to really pool CR3 shadow pages.)
2091 *
2092 * @returns VBox status code.
2093 * @param pPool The pool.
2094 * @param idxRoot The CR3 (root) page index.
2095 * @param GCPhysCR3 The (new) CR3 value.
2096 */
2097int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
2098{
2099 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
2100 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
2101 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d} GCPhysCR3=%RGp\n",
2102 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
2103
2104 /*
2105 * The unlikely case where it already matches.
2106 */
2107 if (pPage->GCPhys == GCPhysCR3)
2108 {
2109 Assert(pPage->fMonitored);
2110 return VINF_SUCCESS;
2111 }
2112
2113 /*
2114 * Flush the current monitoring and remove it from the hash.
2115 */
2116 int rc = VINF_SUCCESS;
2117 if (pPage->fMonitored)
2118 {
2119 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
2120 rc = pgmPoolMonitorFlush(pPool, pPage);
2121 if (rc == VERR_PGM_POOL_CLEARED)
2122 rc = VINF_SUCCESS;
2123 else
2124 AssertFatalRC(rc);
2125 pgmPoolHashRemove(pPool, pPage);
2126 }
2127
2128 /*
2129 * Monitor the page at the new location and insert it into the hash.
2130 */
2131 pPage->GCPhys = GCPhysCR3;
2132 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
2133 if (rc2 != VERR_PGM_POOL_CLEARED)
2134 {
2135 AssertFatalRC(rc2);
2136 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
2137 rc = rc2;
2138 }
2139 pgmPoolHashInsert(pPool, pPage);
2140 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
2141 return rc;
2142}
2143
2144
2145/**
2146 * Removes the monitoring of a CR3 page (special).
2147 *
2148 * @returns VBox status code.
2149 * @param pPool The pool.
2150 * @param idxRoot The CR3 (root) page index.
2151 */
2152int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
2153{
2154 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
2155 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
2156 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d}\n",
2157 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
2158
2159 if (!pPage->fMonitored)
2160 return VINF_SUCCESS;
2161
2162 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
2163 int rc = pgmPoolMonitorFlush(pPool, pPage);
2164 if (rc != VERR_PGM_POOL_CLEARED)
2165 AssertFatalRC(rc);
2166 else
2167 rc = VINF_SUCCESS;
2168 pgmPoolHashRemove(pPool, pPage);
2169 Assert(!pPage->fMonitored);
2170 pPage->GCPhys = NIL_RTGCPHYS;
2171 return rc;
2172}
2173
2174# endif /* PGMPOOL_WITH_MIXED_PT_CR3 && !VBOX_WITH_PGMPOOL_PAGING_ONLY*/
2175
2176/**
2177 * Inserts the page into the list of modified pages.
2178 *
2179 * @param pPool The pool.
2180 * @param pPage The page.
2181 */
2182void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2183{
2184 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2185 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2186 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2187 && pPool->iModifiedHead != pPage->idx,
2188 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2189 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2190 pPool->iModifiedHead, pPool->cModifiedPages));
2191
2192 pPage->iModifiedNext = pPool->iModifiedHead;
2193 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2194 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2195 pPool->iModifiedHead = pPage->idx;
2196 pPool->cModifiedPages++;
2197#ifdef VBOX_WITH_STATISTICS
2198 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2199 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2200#endif
2201}
2202
2203
2204/**
2205 * Removes the page from the list of modified pages and resets the
2206 * moficiation counter.
2207 *
2208 * @param pPool The pool.
2209 * @param pPage The page which is believed to be in the list of modified pages.
2210 */
2211static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2212{
2213 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2214 if (pPool->iModifiedHead == pPage->idx)
2215 {
2216 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2217 pPool->iModifiedHead = pPage->iModifiedNext;
2218 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2219 {
2220 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2221 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2222 }
2223 pPool->cModifiedPages--;
2224 }
2225 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2226 {
2227 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2228 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2229 {
2230 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2231 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2232 }
2233 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2234 pPool->cModifiedPages--;
2235 }
2236 else
2237 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2238 pPage->cModifications = 0;
2239}
2240
2241
2242/**
2243 * Zaps the list of modified pages, resetting their modification counters in the process.
2244 *
2245 * @param pVM The VM handle.
2246 */
2247void pgmPoolMonitorModifiedClearAll(PVM pVM)
2248{
2249 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2250 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2251
2252 unsigned cPages = 0; NOREF(cPages);
2253 uint16_t idx = pPool->iModifiedHead;
2254 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2255 while (idx != NIL_PGMPOOL_IDX)
2256 {
2257 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2258 idx = pPage->iModifiedNext;
2259 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2260 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2261 pPage->cModifications = 0;
2262 Assert(++cPages);
2263 }
2264 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2265 pPool->cModifiedPages = 0;
2266}
2267
2268
2269#ifdef IN_RING3
2270/**
2271 * Clear all shadow pages and clear all modification counters.
2272 *
2273 * @param pVM The VM handle.
2274 * @remark Should only be used when monitoring is available, thus placed in
2275 * the PGMPOOL_WITH_MONITORING #ifdef.
2276 */
2277void pgmPoolClearAll(PVM pVM)
2278{
2279 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2280 STAM_PROFILE_START(&pPool->StatClearAll, c);
2281 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2282
2283 /*
2284 * Iterate all the pages until we've encountered all that in use.
2285 * This is simple but not quite optimal solution.
2286 */
2287 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2288 unsigned cLeft = pPool->cUsedPages;
2289 unsigned iPage = pPool->cCurPages;
2290 while (--iPage >= PGMPOOL_IDX_FIRST)
2291 {
2292 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2293 if (pPage->GCPhys != NIL_RTGCPHYS)
2294 {
2295 switch (pPage->enmKind)
2296 {
2297 /*
2298 * We only care about shadow page tables.
2299 */
2300 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2301 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2302 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2303 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2304 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2305 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2306 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2307 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2308 {
2309#ifdef PGMPOOL_WITH_USER_TRACKING
2310 if (pPage->cPresent)
2311#endif
2312 {
2313 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2314 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2315 ASMMemZeroPage(pvShw);
2316 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2317#ifdef PGMPOOL_WITH_USER_TRACKING
2318 pPage->cPresent = 0;
2319 pPage->iFirstPresent = ~0;
2320#endif
2321 }
2322 }
2323 /* fall thru */
2324
2325 default:
2326 Assert(!pPage->cModifications || ++cModifiedPages);
2327 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2328 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2329 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2330 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2331 pPage->cModifications = 0;
2332 break;
2333
2334 }
2335 if (!--cLeft)
2336 break;
2337 }
2338 }
2339
2340 /* swipe the special pages too. */
2341 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2342 {
2343 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2344 if (pPage->GCPhys != NIL_RTGCPHYS)
2345 {
2346 Assert(!pPage->cModifications || ++cModifiedPages);
2347 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2348 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2349 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2350 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2351 pPage->cModifications = 0;
2352 }
2353 }
2354
2355#ifndef DEBUG_michael
2356 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2357#endif
2358 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2359 pPool->cModifiedPages = 0;
2360
2361#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2362 /*
2363 * Clear all the GCPhys links and rebuild the phys ext free list.
2364 */
2365 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2366 pRam;
2367 pRam = pRam->CTX_SUFF(pNext))
2368 {
2369 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2370 while (iPage-- > 0)
2371 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2372 }
2373
2374 pPool->iPhysExtFreeHead = 0;
2375 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2376 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2377 for (unsigned i = 0; i < cMaxPhysExts; i++)
2378 {
2379 paPhysExts[i].iNext = i + 1;
2380 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2381 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2382 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2383 }
2384 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2385#endif
2386
2387
2388 pPool->cPresent = 0;
2389 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2390}
2391#endif /* IN_RING3 */
2392
2393
2394/**
2395 * Handle SyncCR3 pool tasks
2396 *
2397 * @returns VBox status code.
2398 * @retval VINF_SUCCESS if successfully added.
2399 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2400 * @param pVM The VM handle.
2401 * @remark Should only be used when monitoring is available, thus placed in
2402 * the PGMPOOL_WITH_MONITORING #ifdef.
2403 */
2404int pgmPoolSyncCR3(PVM pVM)
2405{
2406 LogFlow(("pgmPoolSyncCR3\n"));
2407 /*
2408 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2409 * Occasionally we will have to clear all the shadow page tables because we wanted
2410 * to monitor a page which was mapped by too many shadowed page tables. This operation
2411 * sometimes refered to as a 'lightweight flush'.
2412 */
2413 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2414 pgmPoolMonitorModifiedClearAll(pVM);
2415 else
2416 {
2417# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2418 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2419 pgmPoolClearAll(pVM);
2420# else /* !IN_RING3 */
2421 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2422 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2423 return VINF_PGM_SYNC_CR3;
2424# endif /* !IN_RING3 */
2425 }
2426 return VINF_SUCCESS;
2427}
2428
2429#endif /* PGMPOOL_WITH_MONITORING */
2430#ifdef PGMPOOL_WITH_USER_TRACKING
2431
2432/**
2433 * Frees up at least one user entry.
2434 *
2435 * @returns VBox status code.
2436 * @retval VINF_SUCCESS if successfully added.
2437 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2438 * @param pPool The pool.
2439 * @param iUser The user index.
2440 */
2441static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2442{
2443 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2444#ifdef PGMPOOL_WITH_CACHE
2445 /*
2446 * Just free cached pages in a braindead fashion.
2447 */
2448 /** @todo walk the age list backwards and free the first with usage. */
2449 int rc = VINF_SUCCESS;
2450 do
2451 {
2452 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2453 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2454 rc = rc2;
2455 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2456 return rc;
2457#else
2458 /*
2459 * Lazy approach.
2460 */
2461 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2462 Assert(!CPUMIsGuestInLongMode(pVM));
2463 pgmPoolFlushAllInt(pPool);
2464 return VERR_PGM_POOL_FLUSHED;
2465#endif
2466}
2467
2468
2469/**
2470 * Inserts a page into the cache.
2471 *
2472 * This will create user node for the page, insert it into the GCPhys
2473 * hash, and insert it into the age list.
2474 *
2475 * @returns VBox status code.
2476 * @retval VINF_SUCCESS if successfully added.
2477 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2478 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2479 * @param pPool The pool.
2480 * @param pPage The cached page.
2481 * @param GCPhys The GC physical address of the page we're gonna shadow.
2482 * @param iUser The user index.
2483 * @param iUserTable The user table index.
2484 */
2485DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2486{
2487 int rc = VINF_SUCCESS;
2488 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2489
2490 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2491
2492#ifdef VBOX_STRICT
2493 /*
2494 * Check that the entry doesn't already exists.
2495 */
2496 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2497 {
2498 uint16_t i = pPage->iUserHead;
2499 do
2500 {
2501 Assert(i < pPool->cMaxUsers);
2502 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2503 i = paUsers[i].iNext;
2504 } while (i != NIL_PGMPOOL_USER_INDEX);
2505 }
2506#endif
2507
2508 /*
2509 * Find free a user node.
2510 */
2511 uint16_t i = pPool->iUserFreeHead;
2512 if (i == NIL_PGMPOOL_USER_INDEX)
2513 {
2514 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2515 if (RT_FAILURE(rc))
2516 return rc;
2517 i = pPool->iUserFreeHead;
2518 }
2519
2520 /*
2521 * Unlink the user node from the free list,
2522 * initialize and insert it into the user list.
2523 */
2524 pPool->iUserFreeHead = paUsers[i].iNext;
2525 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2526 paUsers[i].iUser = iUser;
2527 paUsers[i].iUserTable = iUserTable;
2528 pPage->iUserHead = i;
2529
2530 /*
2531 * Insert into cache and enable monitoring of the guest page if enabled.
2532 *
2533 * Until we implement caching of all levels, including the CR3 one, we'll
2534 * have to make sure we don't try monitor & cache any recursive reuse of
2535 * a monitored CR3 page. Because all windows versions are doing this we'll
2536 * have to be able to do combined access monitoring, CR3 + PT and
2537 * PD + PT (guest PAE).
2538 *
2539 * Update:
2540 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2541 */
2542#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2543# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2544 const bool fCanBeMonitored = true;
2545# else
2546 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2547 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2548 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2549# endif
2550# ifdef PGMPOOL_WITH_CACHE
2551 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2552# endif
2553 if (fCanBeMonitored)
2554 {
2555# ifdef PGMPOOL_WITH_MONITORING
2556 rc = pgmPoolMonitorInsert(pPool, pPage);
2557 if (rc == VERR_PGM_POOL_CLEARED)
2558 {
2559 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2560# ifndef PGMPOOL_WITH_CACHE
2561 pgmPoolMonitorFlush(pPool, pPage);
2562 rc = VERR_PGM_POOL_FLUSHED;
2563# endif
2564 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2565 paUsers[i].iNext = pPool->iUserFreeHead;
2566 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2567 pPool->iUserFreeHead = i;
2568 }
2569 }
2570# endif
2571#endif /* PGMPOOL_WITH_MONITORING */
2572 return rc;
2573}
2574
2575
2576# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2577/**
2578 * Adds a user reference to a page.
2579 *
2580 * This will move the page to the head of the
2581 *
2582 * @returns VBox status code.
2583 * @retval VINF_SUCCESS if successfully added.
2584 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2585 * @param pPool The pool.
2586 * @param pPage The cached page.
2587 * @param iUser The user index.
2588 * @param iUserTable The user table.
2589 */
2590static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2591{
2592 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2593
2594 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2595# ifdef VBOX_STRICT
2596 /*
2597 * Check that the entry doesn't already exists.
2598 */
2599 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2600 {
2601 uint16_t i = pPage->iUserHead;
2602 do
2603 {
2604 Assert(i < pPool->cMaxUsers);
2605 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2606 i = paUsers[i].iNext;
2607 } while (i != NIL_PGMPOOL_USER_INDEX);
2608 }
2609# endif
2610
2611 /*
2612 * Allocate a user node.
2613 */
2614 uint16_t i = pPool->iUserFreeHead;
2615 if (i == NIL_PGMPOOL_USER_INDEX)
2616 {
2617 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2618 if (RT_FAILURE(rc))
2619 return rc;
2620 i = pPool->iUserFreeHead;
2621 }
2622 pPool->iUserFreeHead = paUsers[i].iNext;
2623
2624 /*
2625 * Initialize the user node and insert it.
2626 */
2627 paUsers[i].iNext = pPage->iUserHead;
2628 paUsers[i].iUser = iUser;
2629 paUsers[i].iUserTable = iUserTable;
2630 pPage->iUserHead = i;
2631
2632# ifdef PGMPOOL_WITH_CACHE
2633 /*
2634 * Tell the cache to update its replacement stats for this page.
2635 */
2636 pgmPoolCacheUsed(pPool, pPage);
2637# endif
2638 return VINF_SUCCESS;
2639}
2640# endif /* PGMPOOL_WITH_CACHE */
2641
2642
2643/**
2644 * Frees a user record associated with a page.
2645 *
2646 * This does not clear the entry in the user table, it simply replaces the
2647 * user record to the chain of free records.
2648 *
2649 * @param pPool The pool.
2650 * @param HCPhys The HC physical address of the shadow page.
2651 * @param iUser The shadow page pool index of the user table.
2652 * @param iUserTable The index into the user table (shadowed).
2653 */
2654static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2655{
2656 /*
2657 * Unlink and free the specified user entry.
2658 */
2659 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2660
2661 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2662 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2663 uint16_t i = pPage->iUserHead;
2664 if ( i != NIL_PGMPOOL_USER_INDEX
2665 && paUsers[i].iUser == iUser
2666 && paUsers[i].iUserTable == iUserTable)
2667 {
2668 pPage->iUserHead = paUsers[i].iNext;
2669
2670 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2671 paUsers[i].iNext = pPool->iUserFreeHead;
2672 pPool->iUserFreeHead = i;
2673 return;
2674 }
2675
2676 /* General: Linear search. */
2677 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2678 while (i != NIL_PGMPOOL_USER_INDEX)
2679 {
2680 if ( paUsers[i].iUser == iUser
2681 && paUsers[i].iUserTable == iUserTable)
2682 {
2683 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2684 paUsers[iPrev].iNext = paUsers[i].iNext;
2685 else
2686 pPage->iUserHead = paUsers[i].iNext;
2687
2688 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2689 paUsers[i].iNext = pPool->iUserFreeHead;
2690 pPool->iUserFreeHead = i;
2691 return;
2692 }
2693 iPrev = i;
2694 i = paUsers[i].iNext;
2695 }
2696
2697 /* Fatal: didn't find it */
2698 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2699 iUser, iUserTable, pPage->GCPhys));
2700}
2701
2702
2703/**
2704 * Gets the entry size of a shadow table.
2705 *
2706 * @param enmKind The kind of page.
2707 *
2708 * @returns The size of the entry in bytes. That is, 4 or 8.
2709 * @returns If the kind is not for a table, an assertion is raised and 0 is
2710 * returned.
2711 */
2712DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2713{
2714 switch (enmKind)
2715 {
2716 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2717 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2718 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2719#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2720 case PGMPOOLKIND_32BIT_PD:
2721 case PGMPOOLKIND_32BIT_PD_PHYS:
2722#else
2723 case PGMPOOLKIND_ROOT_32BIT_PD:
2724#endif
2725 return 4;
2726
2727 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2728 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2729 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2730 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2731 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2732 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2733 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2734 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2735 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2736 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2737 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2738 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2739 case PGMPOOLKIND_64BIT_PML4:
2740#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2741 case PGMPOOLKIND_ROOT_PAE_PD:
2742 case PGMPOOLKIND_ROOT_PDPT:
2743#endif
2744 case PGMPOOLKIND_PAE_PDPT:
2745 case PGMPOOLKIND_ROOT_NESTED:
2746 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2747 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2748 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2749 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2750 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2751 case PGMPOOLKIND_PAE_PD_PHYS:
2752 case PGMPOOLKIND_PAE_PDPT_PHYS:
2753 return 8;
2754
2755 default:
2756 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2757 }
2758}
2759
2760
2761/**
2762 * Gets the entry size of a guest table.
2763 *
2764 * @param enmKind The kind of page.
2765 *
2766 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2767 * @returns If the kind is not for a table, an assertion is raised and 0 is
2768 * returned.
2769 */
2770DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2771{
2772 switch (enmKind)
2773 {
2774 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2775 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2776#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2777 case PGMPOOLKIND_32BIT_PD:
2778#else
2779 case PGMPOOLKIND_ROOT_32BIT_PD:
2780#endif
2781 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2782 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2783 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2784 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2785 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2786 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2787 return 4;
2788
2789 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2790 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2791 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2792 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2793 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2794 case PGMPOOLKIND_64BIT_PML4:
2795#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2796 case PGMPOOLKIND_PAE_PDPT:
2797#else
2798 case PGMPOOLKIND_ROOT_PAE_PD:
2799 case PGMPOOLKIND_ROOT_PDPT:
2800#endif
2801 return 8;
2802
2803 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2804 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2805 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2806 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2807 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2808 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2809 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2810 case PGMPOOLKIND_ROOT_NESTED:
2811 case PGMPOOLKIND_PAE_PD_PHYS:
2812 case PGMPOOLKIND_PAE_PDPT_PHYS:
2813 case PGMPOOLKIND_32BIT_PD_PHYS:
2814 /** @todo can we return 0? (nobody is calling this...) */
2815 AssertFailed();
2816 return 0;
2817
2818 default:
2819 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2820 }
2821}
2822
2823#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2824
2825/**
2826 * Scans one shadow page table for mappings of a physical page.
2827 *
2828 * @param pVM The VM handle.
2829 * @param pPhysPage The guest page in question.
2830 * @param iShw The shadow page table.
2831 * @param cRefs The number of references made in that PT.
2832 */
2833static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2834{
2835 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2836 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2837
2838 /*
2839 * Assert sanity.
2840 */
2841 Assert(cRefs == 1);
2842 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2843 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2844
2845 /*
2846 * Then, clear the actual mappings to the page in the shadow PT.
2847 */
2848 switch (pPage->enmKind)
2849 {
2850 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2851 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2852 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2853 {
2854 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2855 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2856 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2857 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2858 {
2859 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2860 pPT->a[i].u = 0;
2861 cRefs--;
2862 if (!cRefs)
2863 return;
2864 }
2865#ifdef LOG_ENABLED
2866 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2867 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2868 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2869 {
2870 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2871 pPT->a[i].u = 0;
2872 }
2873#endif
2874 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2875 break;
2876 }
2877
2878 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2879 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2880 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2881 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2882 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2883 {
2884 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2885 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2886 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2887 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2888 {
2889 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2890 pPT->a[i].u = 0;
2891 cRefs--;
2892 if (!cRefs)
2893 return;
2894 }
2895#ifdef LOG_ENABLED
2896 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2897 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2898 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2899 {
2900 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2901 pPT->a[i].u = 0;
2902 }
2903#endif
2904 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
2905 break;
2906 }
2907
2908 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2909 {
2910 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2911 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2912 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2913 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2914 {
2915 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2916 pPT->a[i].u = 0;
2917 cRefs--;
2918 if (!cRefs)
2919 return;
2920 }
2921#ifdef LOG_ENABLED
2922 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2923 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2924 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2925 {
2926 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2927 pPT->a[i].u = 0;
2928 }
2929#endif
2930 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2931 break;
2932 }
2933
2934 default:
2935 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2936 }
2937}
2938
2939
2940/**
2941 * Scans one shadow page table for mappings of a physical page.
2942 *
2943 * @param pVM The VM handle.
2944 * @param pPhysPage The guest page in question.
2945 * @param iShw The shadow page table.
2946 * @param cRefs The number of references made in that PT.
2947 */
2948void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2949{
2950 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2951 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2952 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2953 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2954 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2955 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2956}
2957
2958
2959/**
2960 * Flushes a list of shadow page tables mapping the same physical page.
2961 *
2962 * @param pVM The VM handle.
2963 * @param pPhysPage The guest page in question.
2964 * @param iPhysExt The physical cross reference extent list to flush.
2965 */
2966void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2967{
2968 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2969 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2970 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2971
2972 const uint16_t iPhysExtStart = iPhysExt;
2973 PPGMPOOLPHYSEXT pPhysExt;
2974 do
2975 {
2976 Assert(iPhysExt < pPool->cMaxPhysExts);
2977 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2978 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2979 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2980 {
2981 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2982 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2983 }
2984
2985 /* next */
2986 iPhysExt = pPhysExt->iNext;
2987 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2988
2989 /* insert the list into the free list and clear the ram range entry. */
2990 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2991 pPool->iPhysExtFreeHead = iPhysExtStart;
2992 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2993
2994 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2995}
2996
2997#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2998
2999/**
3000 * Scans all shadow page tables for mappings of a physical page.
3001 *
3002 * This may be slow, but it's most likely more efficient than cleaning
3003 * out the entire page pool / cache.
3004 *
3005 * @returns VBox status code.
3006 * @retval VINF_SUCCESS if all references has been successfully cleared.
3007 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3008 * a page pool cleaning.
3009 *
3010 * @param pVM The VM handle.
3011 * @param pPhysPage The guest page in question.
3012 */
3013int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3014{
3015 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3016 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3017 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
3018 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
3019
3020#if 1
3021 /*
3022 * There is a limit to what makes sense.
3023 */
3024 if (pPool->cPresent > 1024)
3025 {
3026 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3027 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3028 return VINF_PGM_GCPHYS_ALIASED;
3029 }
3030#endif
3031
3032 /*
3033 * Iterate all the pages until we've encountered all that in use.
3034 * This is simple but not quite optimal solution.
3035 */
3036 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3037 const uint32_t u32 = u64;
3038 unsigned cLeft = pPool->cUsedPages;
3039 unsigned iPage = pPool->cCurPages;
3040 while (--iPage >= PGMPOOL_IDX_FIRST)
3041 {
3042 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3043 if (pPage->GCPhys != NIL_RTGCPHYS)
3044 {
3045 switch (pPage->enmKind)
3046 {
3047 /*
3048 * We only care about shadow page tables.
3049 */
3050 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3051 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3052 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3053 {
3054 unsigned cPresent = pPage->cPresent;
3055 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3056 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3057 if (pPT->a[i].n.u1Present)
3058 {
3059 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3060 {
3061 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3062 pPT->a[i].u = 0;
3063 }
3064 if (!--cPresent)
3065 break;
3066 }
3067 break;
3068 }
3069
3070 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3071 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3072 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3073 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3074 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3075 {
3076 unsigned cPresent = pPage->cPresent;
3077 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3078 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3079 if (pPT->a[i].n.u1Present)
3080 {
3081 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3082 {
3083 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3084 pPT->a[i].u = 0;
3085 }
3086 if (!--cPresent)
3087 break;
3088 }
3089 break;
3090 }
3091 }
3092 if (!--cLeft)
3093 break;
3094 }
3095 }
3096
3097 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3098 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3099 return VINF_SUCCESS;
3100}
3101
3102
3103/**
3104 * Clears the user entry in a user table.
3105 *
3106 * This is used to remove all references to a page when flushing it.
3107 */
3108static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3109{
3110 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3111 Assert(pUser->iUser < pPool->cCurPages);
3112 uint32_t iUserTable = pUser->iUserTable;
3113
3114 /*
3115 * Map the user page.
3116 */
3117 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3118#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3119 if (pUserPage->enmKind == PGMPOOLKIND_ROOT_PAE_PD)
3120 {
3121 /* Must translate the fake 2048 entry PD to a 512 PD one since the R0 mapping is not linear. */
3122 Assert(pUser->iUser == PGMPOOL_IDX_PAE_PD);
3123 uint32_t iPdpt = iUserTable / X86_PG_PAE_ENTRIES;
3124 iUserTable %= X86_PG_PAE_ENTRIES;
3125 pUserPage = &pPool->aPages[PGMPOOL_IDX_PAE_PD_0 + iPdpt];
3126 Assert(pUserPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD);
3127 }
3128#endif
3129 union
3130 {
3131 uint64_t *pau64;
3132 uint32_t *pau32;
3133 } u;
3134 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3135
3136 /* Safety precaution in case we change the paging for other modes too in the future. */
3137 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
3138
3139#ifdef VBOX_STRICT
3140 /*
3141 * Some sanity checks.
3142 */
3143 switch (pUserPage->enmKind)
3144 {
3145# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3146 case PGMPOOLKIND_32BIT_PD:
3147 case PGMPOOLKIND_32BIT_PD_PHYS:
3148 Assert(iUserTable < X86_PG_ENTRIES);
3149 break;
3150# else
3151 case PGMPOOLKIND_ROOT_32BIT_PD:
3152 Assert(iUserTable < X86_PG_ENTRIES);
3153 Assert(!(u.pau32[iUserTable] & PGM_PDFLAGS_MAPPING));
3154 break;
3155# endif
3156# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3157 case PGMPOOLKIND_ROOT_PAE_PD:
3158 Assert(iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
3159 AssertMsg(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING), ("%llx %d\n", u.pau64[iUserTable], iUserTable));
3160 break;
3161# endif
3162# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3163 case PGMPOOLKIND_PAE_PDPT:
3164 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3165 case PGMPOOLKIND_PAE_PDPT_PHYS:
3166# else
3167 case PGMPOOLKIND_ROOT_PDPT:
3168# endif
3169 Assert(iUserTable < 4);
3170 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3171 break;
3172 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3173 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3174 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3175 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3176 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3177 case PGMPOOLKIND_PAE_PD_PHYS:
3178 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3179 break;
3180 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3181 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3182 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3183 break;
3184 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3185 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3186 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3187 break;
3188 case PGMPOOLKIND_64BIT_PML4:
3189 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3190 /* GCPhys >> PAGE_SHIFT is the index here */
3191 break;
3192 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3193 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3194 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3195 break;
3196
3197 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3198 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3199 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3200 break;
3201
3202 case PGMPOOLKIND_ROOT_NESTED:
3203 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3204 break;
3205
3206 default:
3207 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3208 break;
3209 }
3210#endif /* VBOX_STRICT */
3211
3212 /*
3213 * Clear the entry in the user page.
3214 */
3215 switch (pUserPage->enmKind)
3216 {
3217 /* 32-bit entries */
3218#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3219 case PGMPOOLKIND_32BIT_PD:
3220 case PGMPOOLKIND_32BIT_PD_PHYS:
3221#else
3222 case PGMPOOLKIND_ROOT_32BIT_PD:
3223#endif
3224 u.pau32[iUserTable] = 0;
3225 break;
3226
3227 /* 64-bit entries */
3228 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3229 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3230 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3231 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3232 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3233 case PGMPOOLKIND_PAE_PD_PHYS:
3234 case PGMPOOLKIND_PAE_PDPT_PHYS:
3235 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3236 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3237 case PGMPOOLKIND_64BIT_PML4:
3238 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3239 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3240# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3241 case PGMPOOLKIND_ROOT_PAE_PD:
3242#endif
3243#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3244 case PGMPOOLKIND_PAE_PDPT:
3245 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3246#else
3247 case PGMPOOLKIND_ROOT_PDPT:
3248#endif
3249 case PGMPOOLKIND_ROOT_NESTED:
3250 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3251 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3252 u.pau64[iUserTable] = 0;
3253 break;
3254
3255 default:
3256 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3257 }
3258}
3259
3260
3261/**
3262 * Clears all users of a page.
3263 */
3264static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3265{
3266 /*
3267 * Free all the user records.
3268 */
3269 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3270
3271 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3272 uint16_t i = pPage->iUserHead;
3273 while (i != NIL_PGMPOOL_USER_INDEX)
3274 {
3275 /* Clear enter in user table. */
3276 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3277
3278 /* Free it. */
3279 const uint16_t iNext = paUsers[i].iNext;
3280 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3281 paUsers[i].iNext = pPool->iUserFreeHead;
3282 pPool->iUserFreeHead = i;
3283
3284 /* Next. */
3285 i = iNext;
3286 }
3287 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3288}
3289
3290#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3291
3292/**
3293 * Allocates a new physical cross reference extent.
3294 *
3295 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3296 * @param pVM The VM handle.
3297 * @param piPhysExt Where to store the phys ext index.
3298 */
3299PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3300{
3301 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3302 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3303 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3304 {
3305 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3306 return NULL;
3307 }
3308 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3309 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3310 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3311 *piPhysExt = iPhysExt;
3312 return pPhysExt;
3313}
3314
3315
3316/**
3317 * Frees a physical cross reference extent.
3318 *
3319 * @param pVM The VM handle.
3320 * @param iPhysExt The extent to free.
3321 */
3322void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3323{
3324 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3325 Assert(iPhysExt < pPool->cMaxPhysExts);
3326 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3327 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3328 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3329 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3330 pPool->iPhysExtFreeHead = iPhysExt;
3331}
3332
3333
3334/**
3335 * Frees a physical cross reference extent.
3336 *
3337 * @param pVM The VM handle.
3338 * @param iPhysExt The extent to free.
3339 */
3340void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3341{
3342 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3343
3344 const uint16_t iPhysExtStart = iPhysExt;
3345 PPGMPOOLPHYSEXT pPhysExt;
3346 do
3347 {
3348 Assert(iPhysExt < pPool->cMaxPhysExts);
3349 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3350 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3351 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3352
3353 /* next */
3354 iPhysExt = pPhysExt->iNext;
3355 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3356
3357 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3358 pPool->iPhysExtFreeHead = iPhysExtStart;
3359}
3360
3361
3362/**
3363 * Insert a reference into a list of physical cross reference extents.
3364 *
3365 * @returns The new ram range flags (top 16-bits).
3366 *
3367 * @param pVM The VM handle.
3368 * @param iPhysExt The physical extent index of the list head.
3369 * @param iShwPT The shadow page table index.
3370 *
3371 */
3372static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3373{
3374 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3375 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3376
3377 /* special common case. */
3378 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3379 {
3380 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3381 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3382 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
3383 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3384 }
3385
3386 /* general treatment. */
3387 const uint16_t iPhysExtStart = iPhysExt;
3388 unsigned cMax = 15;
3389 for (;;)
3390 {
3391 Assert(iPhysExt < pPool->cMaxPhysExts);
3392 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3393 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3394 {
3395 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3396 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3397 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3398 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3399 }
3400 if (!--cMax)
3401 {
3402 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3403 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3404 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
3405 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3406 }
3407 }
3408
3409 /* add another extent to the list. */
3410 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3411 if (!pNew)
3412 {
3413 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3414 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3415 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3416 }
3417 pNew->iNext = iPhysExtStart;
3418 pNew->aidx[0] = iShwPT;
3419 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3420 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3421}
3422
3423
3424/**
3425 * Add a reference to guest physical page where extents are in use.
3426 *
3427 * @returns The new ram range flags (top 16-bits).
3428 *
3429 * @param pVM The VM handle.
3430 * @param u16 The ram range flags (top 16-bits).
3431 * @param iShwPT The shadow page table index.
3432 */
3433uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3434{
3435 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
3436 {
3437 /*
3438 * Convert to extent list.
3439 */
3440 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
3441 uint16_t iPhysExt;
3442 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3443 if (pPhysExt)
3444 {
3445 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
3446 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3447 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
3448 pPhysExt->aidx[1] = iShwPT;
3449 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3450 }
3451 else
3452 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3453 }
3454 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
3455 {
3456 /*
3457 * Insert into the extent list.
3458 */
3459 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
3460 }
3461 else
3462 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3463 return u16;
3464}
3465
3466
3467/**
3468 * Clear references to guest physical memory.
3469 *
3470 * @param pPool The pool.
3471 * @param pPage The page.
3472 * @param pPhysPage Pointer to the aPages entry in the ram range.
3473 */
3474void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3475{
3476 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
3477 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3478
3479 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
3480 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
3481 {
3482 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3483 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3484 do
3485 {
3486 Assert(iPhysExt < pPool->cMaxPhysExts);
3487
3488 /*
3489 * Look for the shadow page and check if it's all freed.
3490 */
3491 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3492 {
3493 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3494 {
3495 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3496
3497 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3498 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3499 {
3500 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3501 return;
3502 }
3503
3504 /* we can free the node. */
3505 PVM pVM = pPool->CTX_SUFF(pVM);
3506 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3507 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3508 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3509 {
3510 /* lonely node */
3511 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3512 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
3513 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3514 }
3515 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3516 {
3517 /* head */
3518 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
3519 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
3520 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
3521 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
3522 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3523 }
3524 else
3525 {
3526 /* in list */
3527 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3528 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3529 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3530 }
3531 iPhysExt = iPhysExtNext;
3532 return;
3533 }
3534 }
3535
3536 /* next */
3537 iPhysExtPrev = iPhysExt;
3538 iPhysExt = paPhysExts[iPhysExt].iNext;
3539 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3540
3541 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3542 }
3543 else /* nothing to do */
3544 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
3545}
3546
3547
3548/**
3549 * Clear references to guest physical memory.
3550 *
3551 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3552 * is assumed to be correct, so the linear search can be skipped and we can assert
3553 * at an earlier point.
3554 *
3555 * @param pPool The pool.
3556 * @param pPage The page.
3557 * @param HCPhys The host physical address corresponding to the guest page.
3558 * @param GCPhys The guest physical address corresponding to HCPhys.
3559 */
3560static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3561{
3562 /*
3563 * Walk range list.
3564 */
3565 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3566 while (pRam)
3567 {
3568 RTGCPHYS off = GCPhys - pRam->GCPhys;
3569 if (off < pRam->cb)
3570 {
3571 /* does it match? */
3572 const unsigned iPage = off >> PAGE_SHIFT;
3573 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3574#ifdef LOG_ENABLED
3575RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3576Log(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3577#endif
3578 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3579 {
3580 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3581 return;
3582 }
3583 break;
3584 }
3585 pRam = pRam->CTX_SUFF(pNext);
3586 }
3587 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3588}
3589
3590
3591/**
3592 * Clear references to guest physical memory.
3593 *
3594 * @param pPool The pool.
3595 * @param pPage The page.
3596 * @param HCPhys The host physical address corresponding to the guest page.
3597 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3598 */
3599static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3600{
3601 /*
3602 * Walk range list.
3603 */
3604 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3605 while (pRam)
3606 {
3607 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3608 if (off < pRam->cb)
3609 {
3610 /* does it match? */
3611 const unsigned iPage = off >> PAGE_SHIFT;
3612 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3613 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3614 {
3615 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3616 return;
3617 }
3618 break;
3619 }
3620 pRam = pRam->CTX_SUFF(pNext);
3621 }
3622
3623 /*
3624 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3625 */
3626 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3627 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3628 while (pRam)
3629 {
3630 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3631 while (iPage-- > 0)
3632 {
3633 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3634 {
3635 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3636 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3637 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3638 return;
3639 }
3640 }
3641 pRam = pRam->CTX_SUFF(pNext);
3642 }
3643
3644 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3645}
3646
3647
3648/**
3649 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3650 *
3651 * @param pPool The pool.
3652 * @param pPage The page.
3653 * @param pShwPT The shadow page table (mapping of the page).
3654 * @param pGstPT The guest page table.
3655 */
3656DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3657{
3658 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3659 if (pShwPT->a[i].n.u1Present)
3660 {
3661 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3662 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3663 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3664 if (!--pPage->cPresent)
3665 break;
3666 }
3667}
3668
3669
3670/**
3671 * Clear references to guest physical memory in a PAE / 32-bit page table.
3672 *
3673 * @param pPool The pool.
3674 * @param pPage The page.
3675 * @param pShwPT The shadow page table (mapping of the page).
3676 * @param pGstPT The guest page table (just a half one).
3677 */
3678DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3679{
3680 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3681 if (pShwPT->a[i].n.u1Present)
3682 {
3683 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3684 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3685 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3686 }
3687}
3688
3689
3690/**
3691 * Clear references to guest physical memory in a PAE / PAE page table.
3692 *
3693 * @param pPool The pool.
3694 * @param pPage The page.
3695 * @param pShwPT The shadow page table (mapping of the page).
3696 * @param pGstPT The guest page table.
3697 */
3698DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3699{
3700 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3701 if (pShwPT->a[i].n.u1Present)
3702 {
3703 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3704 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3705 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3706 }
3707}
3708
3709
3710/**
3711 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3712 *
3713 * @param pPool The pool.
3714 * @param pPage The page.
3715 * @param pShwPT The shadow page table (mapping of the page).
3716 */
3717DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3718{
3719 RTGCPHYS GCPhys = pPage->GCPhys;
3720 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3721 if (pShwPT->a[i].n.u1Present)
3722 {
3723 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3724 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3725 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3726 }
3727}
3728
3729
3730/**
3731 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3732 *
3733 * @param pPool The pool.
3734 * @param pPage The page.
3735 * @param pShwPT The shadow page table (mapping of the page).
3736 */
3737DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3738{
3739 RTGCPHYS GCPhys = pPage->GCPhys;
3740 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3741 if (pShwPT->a[i].n.u1Present)
3742 {
3743 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3744 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3745 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3746 }
3747}
3748
3749#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3750
3751
3752#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3753/**
3754 * Clear references to shadowed pages in a 32 bits page directory.
3755 *
3756 * @param pPool The pool.
3757 * @param pPage The page.
3758 * @param pShwPD The shadow page directory (mapping of the page).
3759 */
3760DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
3761{
3762 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3763 {
3764 if ( pShwPD->a[i].n.u1Present
3765 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3766 )
3767 {
3768 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
3769 if (pSubPage)
3770 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3771 else
3772 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
3773 }
3774 }
3775}
3776#endif
3777
3778/**
3779 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3780 *
3781 * @param pPool The pool.
3782 * @param pPage The page.
3783 * @param pShwPD The shadow page directory (mapping of the page).
3784 */
3785DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3786{
3787 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3788 {
3789 if ( pShwPD->a[i].n.u1Present
3790#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3791 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3792#endif
3793 )
3794 {
3795 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3796 if (pSubPage)
3797 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3798 else
3799 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3800 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3801 }
3802 }
3803}
3804
3805
3806/**
3807 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3808 *
3809 * @param pPool The pool.
3810 * @param pPage The page.
3811 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3812 */
3813DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3814{
3815 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3816 {
3817 if ( pShwPDPT->a[i].n.u1Present
3818#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3819 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
3820#endif
3821 )
3822 {
3823 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3824 if (pSubPage)
3825 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3826 else
3827 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3828 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3829 }
3830 }
3831}
3832
3833
3834/**
3835 * Clear references to shadowed pages in a 64-bit level 4 page table.
3836 *
3837 * @param pPool The pool.
3838 * @param pPage The page.
3839 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3840 */
3841DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3842{
3843 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3844 {
3845 if (pShwPML4->a[i].n.u1Present)
3846 {
3847 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3848 if (pSubPage)
3849 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3850 else
3851 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3852 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3853 }
3854 }
3855}
3856
3857
3858/**
3859 * Clear references to shadowed pages in an EPT page table.
3860 *
3861 * @param pPool The pool.
3862 * @param pPage The page.
3863 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3864 */
3865DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3866{
3867 RTGCPHYS GCPhys = pPage->GCPhys;
3868 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3869 if (pShwPT->a[i].n.u1Present)
3870 {
3871 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3872 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3873 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3874 }
3875}
3876
3877
3878/**
3879 * Clear references to shadowed pages in an EPT page directory.
3880 *
3881 * @param pPool The pool.
3882 * @param pPage The page.
3883 * @param pShwPD The shadow page directory (mapping of the page).
3884 */
3885DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3886{
3887 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3888 {
3889 if (pShwPD->a[i].n.u1Present)
3890 {
3891 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3892 if (pSubPage)
3893 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3894 else
3895 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3896 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3897 }
3898 }
3899}
3900
3901
3902/**
3903 * Clear references to shadowed pages in an EPT page directory pointer table.
3904 *
3905 * @param pPool The pool.
3906 * @param pPage The page.
3907 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3908 */
3909DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3910{
3911 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3912 {
3913 if (pShwPDPT->a[i].n.u1Present)
3914 {
3915 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3916 if (pSubPage)
3917 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3918 else
3919 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3920 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3921 }
3922 }
3923}
3924
3925
3926/**
3927 * Clears all references made by this page.
3928 *
3929 * This includes other shadow pages and GC physical addresses.
3930 *
3931 * @param pPool The pool.
3932 * @param pPage The page.
3933 */
3934static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3935{
3936 /*
3937 * Map the shadow page and take action according to the page kind.
3938 */
3939 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3940 switch (pPage->enmKind)
3941 {
3942#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3943 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3944 {
3945 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3946 void *pvGst;
3947 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3948 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3949 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3950 break;
3951 }
3952
3953 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3954 {
3955 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3956 void *pvGst;
3957 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3958 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3959 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3960 break;
3961 }
3962
3963 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3964 {
3965 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3966 void *pvGst;
3967 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3968 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3969 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3970 break;
3971 }
3972
3973 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3974 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3975 {
3976 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3977 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3978 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3979 break;
3980 }
3981
3982 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3983 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3984 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3985 {
3986 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3987 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3988 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3989 break;
3990 }
3991
3992#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3993 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3994 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3995 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3996 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3997 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3998 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3999 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4000 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4001 break;
4002#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4003
4004 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4005 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4006 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4007 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4008 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4009 case PGMPOOLKIND_PAE_PD_PHYS:
4010 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4011 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4012 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4013 break;
4014
4015#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4016 case PGMPOOLKIND_32BIT_PD_PHYS:
4017 case PGMPOOLKIND_32BIT_PD:
4018 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4019 break;
4020
4021 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4022 case PGMPOOLKIND_PAE_PDPT:
4023 case PGMPOOLKIND_PAE_PDPT_PHYS:
4024#endif
4025 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4026 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4027 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4028 break;
4029
4030 case PGMPOOLKIND_64BIT_PML4:
4031 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4032 break;
4033
4034 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4035 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4036 break;
4037
4038 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4039 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4040 break;
4041
4042 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4043 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4044 break;
4045
4046 default:
4047 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4048 }
4049
4050 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4051 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4052 ASMMemZeroPage(pvShw);
4053 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4054 pPage->fZeroed = true;
4055}
4056
4057#endif /* PGMPOOL_WITH_USER_TRACKING */
4058
4059/**
4060 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
4061 *
4062 * @param pPool The pool.
4063 */
4064static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
4065{
4066#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4067 /* Start a subset so we won't run out of mapping space. */
4068 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4069 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4070#endif
4071
4072 /*
4073 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
4074 */
4075 Assert(NIL_PGMPOOL_IDX == 0);
4076 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
4077 {
4078 /*
4079 * Get the page address.
4080 */
4081 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4082 union
4083 {
4084 uint64_t *pau64;
4085 uint32_t *pau32;
4086 } u;
4087
4088 /*
4089 * Mark stuff not present.
4090 */
4091 switch (pPage->enmKind)
4092 {
4093#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4094 case PGMPOOLKIND_ROOT_32BIT_PD:
4095 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4096 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
4097 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
4098 u.pau32[iPage] = 0;
4099 break;
4100
4101 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4102 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4103 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
4104 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
4105 u.pau64[iPage] = 0;
4106 break;
4107
4108 case PGMPOOLKIND_ROOT_PDPT:
4109 /* Not root of shadowed pages currently, ignore it. */
4110 break;
4111#endif
4112
4113 case PGMPOOLKIND_ROOT_NESTED:
4114 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4115 ASMMemZero32(u.pau64, PAGE_SIZE);
4116 break;
4117 }
4118 }
4119
4120 /*
4121 * Paranoia (to be removed), flag a global CR3 sync.
4122 */
4123 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
4124
4125#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4126 /* Pop the subset. */
4127 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4128#endif
4129}
4130
4131
4132/**
4133 * Flushes the entire cache.
4134 *
4135 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4136 * and execute this CR3 flush.
4137 *
4138 * @param pPool The pool.
4139 */
4140static void pgmPoolFlushAllInt(PPGMPOOL pPool)
4141{
4142 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
4143 LogFlow(("pgmPoolFlushAllInt:\n"));
4144
4145 /*
4146 * If there are no pages in the pool, there is nothing to do.
4147 */
4148 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4149 {
4150 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4151 return;
4152 }
4153
4154 /*
4155 * Nuke the free list and reinsert all pages into it.
4156 */
4157 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4158 {
4159 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4160
4161#ifdef IN_RING3
4162 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMR3, pPage->pvPageR3));
4163#endif
4164#ifdef PGMPOOL_WITH_MONITORING
4165 if (pPage->fMonitored)
4166 pgmPoolMonitorFlush(pPool, pPage);
4167 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4168 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4169 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4170 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4171 pPage->cModifications = 0;
4172#endif
4173 pPage->GCPhys = NIL_RTGCPHYS;
4174 pPage->enmKind = PGMPOOLKIND_FREE;
4175 Assert(pPage->idx == i);
4176 pPage->iNext = i + 1;
4177 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4178 pPage->fSeenNonGlobal = false;
4179 pPage->fMonitored= false;
4180 pPage->fCached = false;
4181 pPage->fReusedFlushPending = false;
4182#ifdef PGMPOOL_WITH_USER_TRACKING
4183 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4184#else
4185 pPage->fCR3Mix = false;
4186#endif
4187#ifdef PGMPOOL_WITH_CACHE
4188 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4189 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4190#endif
4191 }
4192 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4193 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4194 pPool->cUsedPages = 0;
4195
4196#ifdef PGMPOOL_WITH_USER_TRACKING
4197 /*
4198 * Zap and reinitialize the user records.
4199 */
4200 pPool->cPresent = 0;
4201 pPool->iUserFreeHead = 0;
4202 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4203 const unsigned cMaxUsers = pPool->cMaxUsers;
4204 for (unsigned i = 0; i < cMaxUsers; i++)
4205 {
4206 paUsers[i].iNext = i + 1;
4207 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4208 paUsers[i].iUserTable = 0xfffffffe;
4209 }
4210 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4211#endif
4212
4213#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4214 /*
4215 * Clear all the GCPhys links and rebuild the phys ext free list.
4216 */
4217 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4218 pRam;
4219 pRam = pRam->CTX_SUFF(pNext))
4220 {
4221 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4222 while (iPage-- > 0)
4223 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
4224 }
4225
4226 pPool->iPhysExtFreeHead = 0;
4227 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4228 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4229 for (unsigned i = 0; i < cMaxPhysExts; i++)
4230 {
4231 paPhysExts[i].iNext = i + 1;
4232 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4233 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4234 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4235 }
4236 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4237#endif
4238
4239#ifdef PGMPOOL_WITH_MONITORING
4240 /*
4241 * Just zap the modified list.
4242 */
4243 pPool->cModifiedPages = 0;
4244 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4245#endif
4246
4247#ifdef PGMPOOL_WITH_CACHE
4248 /*
4249 * Clear the GCPhys hash and the age list.
4250 */
4251 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4252 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4253 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4254 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4255#endif
4256
4257 /*
4258 * Flush all the special root pages.
4259 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4260 */
4261 pgmPoolFlushAllSpecialRoots(pPool);
4262 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4263 {
4264 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4265 pPage->iNext = NIL_PGMPOOL_IDX;
4266#ifdef PGMPOOL_WITH_MONITORING
4267 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4268 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4269 pPage->cModifications = 0;
4270 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4271 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4272 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4273 if (pPage->fMonitored)
4274 {
4275 PVM pVM = pPool->CTX_SUFF(pVM);
4276 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4277 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4278 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4279 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4280 pPool->pszAccessHandler);
4281 AssertFatalRCSuccess(rc);
4282# ifdef PGMPOOL_WITH_CACHE
4283 pgmPoolHashInsert(pPool, pPage);
4284# endif
4285 }
4286#endif
4287#ifdef PGMPOOL_WITH_USER_TRACKING
4288 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4289#endif
4290#ifdef PGMPOOL_WITH_CACHE
4291 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4292 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4293#endif
4294 }
4295
4296 /*
4297 * Finally, assert the FF.
4298 */
4299 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
4300
4301 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4302}
4303
4304
4305/**
4306 * Flushes a pool page.
4307 *
4308 * This moves the page to the free list after removing all user references to it.
4309 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
4310 *
4311 * @returns VBox status code.
4312 * @retval VINF_SUCCESS on success.
4313 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
4314 * @param pPool The pool.
4315 * @param HCPhys The HC physical address of the shadow page.
4316 */
4317int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4318{
4319 int rc = VINF_SUCCESS;
4320 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4321 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4322 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4323
4324 /*
4325 * Quietly reject any attempts at flushing any of the special root pages.
4326 */
4327 if (pPage->idx < PGMPOOL_IDX_FIRST)
4328 {
4329 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4330 return VINF_SUCCESS;
4331 }
4332
4333 /*
4334 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4335 */
4336 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
4337 {
4338#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4339 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4340 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4341 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4342 || pPage->enmKind == PGMPOOLKIND_32BIT_PD,
4343 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4344#else
4345 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4,
4346 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4347#endif
4348 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4349 return VINF_SUCCESS;
4350 }
4351
4352#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4353 /* Start a subset so we won't run out of mapping space. */
4354 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4355 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4356#endif
4357
4358 /*
4359 * Mark the page as being in need of a ASMMemZeroPage().
4360 */
4361 pPage->fZeroed = false;
4362
4363#ifdef PGMPOOL_WITH_USER_TRACKING
4364 /*
4365 * Clear the page.
4366 */
4367 pgmPoolTrackClearPageUsers(pPool, pPage);
4368 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4369 pgmPoolTrackDeref(pPool, pPage);
4370 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4371#endif
4372
4373#ifdef PGMPOOL_WITH_CACHE
4374 /*
4375 * Flush it from the cache.
4376 */
4377 pgmPoolCacheFlushPage(pPool, pPage);
4378#endif /* PGMPOOL_WITH_CACHE */
4379
4380#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4381 /* Heavy stuff done. */
4382 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4383#endif
4384
4385#ifdef PGMPOOL_WITH_MONITORING
4386 /*
4387 * Deregistering the monitoring.
4388 */
4389 if (pPage->fMonitored)
4390 rc = pgmPoolMonitorFlush(pPool, pPage);
4391#endif
4392
4393 /*
4394 * Free the page.
4395 */
4396 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4397 pPage->iNext = pPool->iFreeHead;
4398 pPool->iFreeHead = pPage->idx;
4399 pPage->enmKind = PGMPOOLKIND_FREE;
4400 pPage->GCPhys = NIL_RTGCPHYS;
4401 pPage->fReusedFlushPending = false;
4402
4403 pPool->cUsedPages--;
4404 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4405 return rc;
4406}
4407
4408
4409/**
4410 * Frees a usage of a pool page.
4411 *
4412 * The caller is responsible to updating the user table so that it no longer
4413 * references the shadow page.
4414 *
4415 * @param pPool The pool.
4416 * @param HCPhys The HC physical address of the shadow page.
4417 * @param iUser The shadow page pool index of the user table.
4418 * @param iUserTable The index into the user table (shadowed).
4419 */
4420void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4421{
4422 STAM_PROFILE_START(&pPool->StatFree, a);
4423 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4424 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4425 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4426#ifdef PGMPOOL_WITH_USER_TRACKING
4427 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4428#endif
4429#ifdef PGMPOOL_WITH_CACHE
4430 if (!pPage->fCached)
4431#endif
4432 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
4433 STAM_PROFILE_STOP(&pPool->StatFree, a);
4434}
4435
4436
4437/**
4438 * Makes one or more free page free.
4439 *
4440 * @returns VBox status code.
4441 * @retval VINF_SUCCESS on success.
4442 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4443 *
4444 * @param pPool The pool.
4445 * @param iUser The user of the page.
4446 */
4447static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
4448{
4449 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4450
4451 /*
4452 * If the pool isn't full grown yet, expand it.
4453 */
4454 if (pPool->cCurPages < pPool->cMaxPages)
4455 {
4456 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4457#ifdef IN_RING3
4458 int rc = PGMR3PoolGrow(pPool->pVMR3);
4459#else
4460 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
4461#endif
4462 if (RT_FAILURE(rc))
4463 return rc;
4464 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4465 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4466 return VINF_SUCCESS;
4467 }
4468
4469#ifdef PGMPOOL_WITH_CACHE
4470 /*
4471 * Free one cached page.
4472 */
4473 return pgmPoolCacheFreeOne(pPool, iUser);
4474#else
4475 /*
4476 * Flush the pool.
4477 *
4478 * If we have tracking enabled, it should be possible to come up with
4479 * a cheap replacement strategy...
4480 */
4481 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
4482 Assert(!CPUMIsGuestInLongMode(pVM));
4483 pgmPoolFlushAllInt(pPool);
4484 return VERR_PGM_POOL_FLUSHED;
4485#endif
4486}
4487
4488
4489/**
4490 * Allocates a page from the pool.
4491 *
4492 * This page may actually be a cached page and not in need of any processing
4493 * on the callers part.
4494 *
4495 * @returns VBox status code.
4496 * @retval VINF_SUCCESS if a NEW page was allocated.
4497 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4498 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4499 * @param pVM The VM handle.
4500 * @param GCPhys The GC physical address of the page we're gonna shadow.
4501 * For 4MB and 2MB PD entries, it's the first address the
4502 * shadow PT is covering.
4503 * @param enmKind The kind of mapping.
4504 * @param iUser The shadow page pool index of the user table.
4505 * @param iUserTable The index into the user table (shadowed).
4506 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4507 */
4508int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
4509{
4510 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4511 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4512 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4513 *ppPage = NULL;
4514 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4515 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4516 * Assert(!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4517
4518#ifdef PGMPOOL_WITH_CACHE
4519 if (pPool->fCacheEnabled)
4520 {
4521 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
4522 if (RT_SUCCESS(rc2))
4523 {
4524 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4525 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4526 return rc2;
4527 }
4528 }
4529#endif
4530
4531 /*
4532 * Allocate a new one.
4533 */
4534 int rc = VINF_SUCCESS;
4535 uint16_t iNew = pPool->iFreeHead;
4536 if (iNew == NIL_PGMPOOL_IDX)
4537 {
4538 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
4539 if (RT_FAILURE(rc))
4540 {
4541 if (rc != VERR_PGM_POOL_CLEARED)
4542 {
4543 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4544 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4545 return rc;
4546 }
4547 Log(("pgmPoolMakeMoreFreePages failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
4548 rc = VERR_PGM_POOL_FLUSHED;
4549 }
4550 iNew = pPool->iFreeHead;
4551 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4552 }
4553
4554 /* unlink the free head */
4555 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4556 pPool->iFreeHead = pPage->iNext;
4557 pPage->iNext = NIL_PGMPOOL_IDX;
4558
4559 /*
4560 * Initialize it.
4561 */
4562 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4563 pPage->enmKind = enmKind;
4564 pPage->GCPhys = GCPhys;
4565 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4566 pPage->fMonitored = false;
4567 pPage->fCached = false;
4568 pPage->fReusedFlushPending = false;
4569#ifdef PGMPOOL_WITH_MONITORING
4570 pPage->cModifications = 0;
4571 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4572 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4573#else
4574 pPage->fCR3Mix = false;
4575#endif
4576#ifdef PGMPOOL_WITH_USER_TRACKING
4577 pPage->cPresent = 0;
4578 pPage->iFirstPresent = ~0;
4579
4580 /*
4581 * Insert into the tracking and cache. If this fails, free the page.
4582 */
4583 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4584 if (RT_FAILURE(rc3))
4585 {
4586 if (rc3 != VERR_PGM_POOL_CLEARED)
4587 {
4588 pPool->cUsedPages--;
4589 pPage->enmKind = PGMPOOLKIND_FREE;
4590 pPage->GCPhys = NIL_RTGCPHYS;
4591 pPage->iNext = pPool->iFreeHead;
4592 pPool->iFreeHead = pPage->idx;
4593 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4594 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4595 return rc3;
4596 }
4597 Log(("pgmPoolTrackInsert failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4598 rc = VERR_PGM_POOL_FLUSHED;
4599 }
4600#endif /* PGMPOOL_WITH_USER_TRACKING */
4601
4602 /*
4603 * Commit the allocation, clear the page and return.
4604 */
4605#ifdef VBOX_WITH_STATISTICS
4606 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4607 pPool->cUsedPagesHigh = pPool->cUsedPages;
4608#endif
4609
4610 if (!pPage->fZeroed)
4611 {
4612 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4613 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4614 ASMMemZeroPage(pv);
4615 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4616 }
4617
4618 *ppPage = pPage;
4619 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4620 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4621 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4622 return rc;
4623}
4624
4625
4626/**
4627 * Frees a usage of a pool page.
4628 *
4629 * @param pVM The VM handle.
4630 * @param HCPhys The HC physical address of the shadow page.
4631 * @param iUser The shadow page pool index of the user table.
4632 * @param iUserTable The index into the user table (shadowed).
4633 */
4634void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4635{
4636 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4637 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4638 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4639}
4640
4641
4642/**
4643 * Gets a in-use page in the pool by it's physical address.
4644 *
4645 * @returns Pointer to the page.
4646 * @param pVM The VM handle.
4647 * @param HCPhys The HC physical address of the shadow page.
4648 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4649 */
4650PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4651{
4652 /** @todo profile this! */
4653 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4654 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4655 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%RHp -> %p:{.idx=%d .GCPhys=%RGp .enmKind=%s}\n",
4656 HCPhys, pPage, pPage->idx, pPage->GCPhys, pgmPoolPoolKindToStr(pPage->enmKind)));
4657 return pPage;
4658}
4659
4660
4661/**
4662 * Flushes the entire cache.
4663 *
4664 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4665 * and execute this CR3 flush.
4666 *
4667 * @param pPool The pool.
4668 */
4669void pgmPoolFlushAll(PVM pVM)
4670{
4671 LogFlow(("pgmPoolFlushAll:\n"));
4672 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4673}
4674
4675#ifdef LOG_ENABLED
4676static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
4677{
4678 switch(enmKind)
4679 {
4680 case PGMPOOLKIND_INVALID:
4681 return "PGMPOOLKIND_INVALID";
4682 case PGMPOOLKIND_FREE:
4683 return "PGMPOOLKIND_FREE";
4684 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4685 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4686 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4687 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4688 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4689 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4690 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4691 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4692 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4693 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4694 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4695 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
4696 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4697 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
4698 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4699 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
4700 case PGMPOOLKIND_32BIT_PD:
4701 return "PGMPOOLKIND_32BIT_PD";
4702 case PGMPOOLKIND_32BIT_PD_PHYS:
4703 return "PGMPOOLKIND_32BIT_PD_PHYS";
4704 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4705 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
4706 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4707 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
4708 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4709 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
4710 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4711 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
4712 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4713 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
4714 case PGMPOOLKIND_PAE_PD_PHYS:
4715 return "PGMPOOLKIND_PAE_PD_PHYS";
4716 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4717 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
4718 case PGMPOOLKIND_PAE_PDPT:
4719 return "PGMPOOLKIND_PAE_PDPT";
4720 case PGMPOOLKIND_PAE_PDPT_PHYS:
4721 return "PGMPOOLKIND_PAE_PDPT_PHYS";
4722 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4723 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
4724 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4725 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
4726 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4727 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
4728 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4729 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
4730 case PGMPOOLKIND_64BIT_PML4:
4731 return "PGMPOOLKIND_64BIT_PML4";
4732 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4733 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
4734 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4735 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
4736 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4737 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
4738#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4739 case PGMPOOLKIND_ROOT_32BIT_PD:
4740 return "PGMPOOLKIND_ROOT_32BIT_PD";
4741 case PGMPOOLKIND_ROOT_PAE_PD:
4742 return "PGMPOOLKIND_ROOT_PAE_PD";
4743 case PGMPOOLKIND_ROOT_PDPT:
4744 return "PGMPOOLKIND_ROOT_PDPT";
4745#endif
4746 case PGMPOOLKIND_ROOT_NESTED:
4747 return "PGMPOOLKIND_ROOT_NESTED";
4748 }
4749 return "Unknown kind!";
4750}
4751#endif
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette