VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 16890

Last change on this file since 16890 was 16782, checked in by vboxsync, 16 years ago

VBOX_WITH_PGMPOOL_PAGING_ONLY: missing case

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 170.0 KB
Line 
1/* $Id: PGMAllPool.cpp 16782 2009-02-16 10:56:21Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42
43
44/*******************************************************************************
45* Internal Functions *
46*******************************************************************************/
47__BEGIN_DECLS
48static void pgmPoolFlushAllInt(PPGMPOOL pPool);
49#ifdef PGMPOOL_WITH_USER_TRACKING
50DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
51DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#endif
54#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
55static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
56#endif
57#ifdef PGMPOOL_WITH_CACHE
58static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
59#endif
60#ifdef PGMPOOL_WITH_MONITORING
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#endif
63#ifndef IN_RING3
64DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
65#endif
66#ifdef LOG_ENABLED
67static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
68#endif
69__END_DECLS
70
71
72/**
73 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
74 *
75 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
76 * @param enmKind The page kind.
77 */
78DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
79{
80 switch (enmKind)
81 {
82 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
83 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
84 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
85 return true;
86 default:
87 return false;
88 }
89}
90
91
92#if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
93/**
94 * Maps a pool page into the current context.
95 *
96 * @returns Pointer to the mapping.
97 * @param pPGM Pointer to the PGM instance data.
98 * @param pPage The page to map.
99 */
100void *pgmPoolMapPageFallback(PPGM pPGM, PPGMPOOLPAGE pPage)
101{
102 /* general pages are take care of by the inlined part, it
103 only ends up here in case of failure. */
104 AssertReleaseReturn(pPage->idx < PGMPOOL_IDX_FIRST, NULL);
105
106/** @todo make sure HCPhys is valid for *all* indexes. */
107 /* special pages. */
108# ifdef IN_RC
109 switch (pPage->idx)
110 {
111# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
112 case PGMPOOL_IDX_PD:
113 case PGMPOOL_IDX_PDPT:
114 case PGMPOOL_IDX_AMD64_CR3:
115 return pPGM->pShwRootRC;
116# else
117 case PGMPOOL_IDX_PD:
118 return pPGM->pShw32BitPdRC;
119 case PGMPOOL_IDX_PAE_PD:
120 case PGMPOOL_IDX_PAE_PD_0:
121 return pPGM->apShwPaePDsRC[0];
122 case PGMPOOL_IDX_PAE_PD_1:
123 return pPGM->apShwPaePDsRC[1];
124 case PGMPOOL_IDX_PAE_PD_2:
125 return pPGM->apShwPaePDsRC[2];
126 case PGMPOOL_IDX_PAE_PD_3:
127 return pPGM->apShwPaePDsRC[3];
128 case PGMPOOL_IDX_PDPT:
129 return pPGM->pShwPaePdptRC;
130# endif
131 default:
132 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
133 return NULL;
134 }
135
136# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
137 RTHCPHYS HCPhys;
138 switch (pPage->idx)
139 {
140# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
141 case PGMPOOL_IDX_PD:
142 case PGMPOOL_IDX_PDPT:
143 case PGMPOOL_IDX_AMD64_CR3:
144 HCPhys = pPGM->HCPhysShwCR3;
145 break;
146
147 case PGMPOOL_IDX_NESTED_ROOT:
148 HCPhys = pPGM->HCPhysShwNestedRoot;
149 break;
150# else
151 case PGMPOOL_IDX_PD:
152 HCPhys = pPGM->HCPhysShw32BitPD;
153 break;
154 case PGMPOOL_IDX_PAE_PD_0:
155 HCPhys = pPGM->aHCPhysPaePDs[0];
156 break;
157 case PGMPOOL_IDX_PAE_PD_1:
158 HCPhys = pPGM->aHCPhysPaePDs[1];
159 break;
160 case PGMPOOL_IDX_PAE_PD_2:
161 HCPhys = pPGM->aHCPhysPaePDs[2];
162 break;
163 case PGMPOOL_IDX_PAE_PD_3:
164 HCPhys = pPGM->aHCPhysPaePDs[3];
165 break;
166 case PGMPOOL_IDX_PDPT:
167 HCPhys = pPGM->HCPhysShwPaePdpt;
168 break;
169 case PGMPOOL_IDX_NESTED_ROOT:
170 HCPhys = pPGM->HCPhysShwNestedRoot;
171 break;
172 case PGMPOOL_IDX_PAE_PD:
173 AssertReleaseMsgFailed(("PGMPOOL_IDX_PAE_PD is not usable in VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 context\n"));
174 return NULL;
175# endif
176 default:
177 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
178 return NULL;
179 }
180 AssertMsg(HCPhys && HCPhys != NIL_RTHCPHYS && !(PAGE_OFFSET_MASK & HCPhys), ("%RHp\n", HCPhys));
181
182 void *pv;
183 pgmR0DynMapHCPageInlined(pPGM, HCPhys, &pv);
184 return pv;
185# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
186}
187#endif /* IN_RC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
188
189
190#ifdef PGMPOOL_WITH_MONITORING
191/**
192 * Determin the size of a write instruction.
193 * @returns number of bytes written.
194 * @param pDis The disassembler state.
195 */
196static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
197{
198 /*
199 * This is very crude and possibly wrong for some opcodes,
200 * but since it's not really supposed to be called we can
201 * probably live with that.
202 */
203 return DISGetParamSize(pDis, &pDis->param1);
204}
205
206
207/**
208 * Flushes a chain of pages sharing the same access monitor.
209 *
210 * @returns VBox status code suitable for scheduling.
211 * @param pPool The pool.
212 * @param pPage A page in the chain.
213 */
214int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
215{
216 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
217
218 /*
219 * Find the list head.
220 */
221 uint16_t idx = pPage->idx;
222 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
223 {
224 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
225 {
226 idx = pPage->iMonitoredPrev;
227 Assert(idx != pPage->idx);
228 pPage = &pPool->aPages[idx];
229 }
230 }
231
232 /*
233 * Iterate the list flushing each shadow page.
234 */
235 int rc = VINF_SUCCESS;
236 for (;;)
237 {
238 idx = pPage->iMonitoredNext;
239 Assert(idx != pPage->idx);
240 if (pPage->idx >= PGMPOOL_IDX_FIRST)
241 {
242 int rc2 = pgmPoolFlushPage(pPool, pPage);
243 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
244 rc = VINF_PGM_SYNC_CR3;
245 }
246 /* next */
247 if (idx == NIL_PGMPOOL_IDX)
248 break;
249 pPage = &pPool->aPages[idx];
250 }
251 return rc;
252}
253
254
255/**
256 * Wrapper for getting the current context pointer to the entry being modified.
257 *
258 * @returns Pointer to the current context mapping of the entry.
259 * @param pPool The pool.
260 * @param pvFault The fault virtual address.
261 * @param GCPhysFault The fault physical address.
262 * @param cbEntry The entry size.
263 */
264#ifdef IN_RING3
265DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
266#else
267DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
268#endif
269{
270#ifdef IN_RC
271 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
272
273#elif defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
274 void *pvRet;
275 int rc = PGMDynMapGCPageOff(pPool->pVMR0, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
276 AssertFatalRCSuccess(rc);
277 return pvRet;
278
279#elif defined(IN_RING0)
280 void *pvRet;
281 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMR0->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
282 AssertFatalRCSuccess(rc);
283 return pvRet;
284
285#elif defined(IN_RING3)
286 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
287#else
288# error "huh?"
289#endif
290}
291
292
293/**
294 * Process shadow entries before they are changed by the guest.
295 *
296 * For PT entries we will clear them. For PD entries, we'll simply check
297 * for mapping conflicts and set the SyncCR3 FF if found.
298 *
299 * @param pPool The pool.
300 * @param pPage The head page.
301 * @param GCPhysFault The guest physical fault address.
302 * @param uAddress In R0 and GC this is the guest context fault address (flat).
303 * In R3 this is the host context 'fault' address.
304 * @param pCpu The disassembler state for figuring out the write size.
305 * This need not be specified if the caller knows we won't do cross entry accesses.
306 */
307#ifdef IN_RING3
308void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
309#else
310void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
311#endif
312{
313 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
314 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
315 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
316
317 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
318
319 for (;;)
320 {
321 union
322 {
323 void *pv;
324 PX86PT pPT;
325 PX86PTPAE pPTPae;
326 PX86PD pPD;
327 PX86PDPAE pPDPae;
328 PX86PDPT pPDPT;
329 PX86PML4 pPML4;
330 } uShw;
331
332 switch (pPage->enmKind)
333 {
334 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
335 {
336 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
337 const unsigned iShw = off / sizeof(X86PTE);
338 if (uShw.pPT->a[iShw].n.u1Present)
339 {
340# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
341 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
342 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
343 pgmPoolTracDerefGCPhysHint(pPool, pPage,
344 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
345 pGstPte->u & X86_PTE_PG_MASK);
346# endif
347 uShw.pPT->a[iShw].u = 0;
348 }
349 break;
350 }
351
352 /* page/2 sized */
353 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
354 {
355 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
356 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
357 {
358 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
359 if (uShw.pPTPae->a[iShw].n.u1Present)
360 {
361# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
362 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
363 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
364 pgmPoolTracDerefGCPhysHint(pPool, pPage,
365 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
366 pGstPte->u & X86_PTE_PG_MASK);
367# endif
368 uShw.pPTPae->a[iShw].u = 0;
369 }
370 }
371 break;
372 }
373
374# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
375 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
376 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
377 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
378 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
379 {
380 unsigned iGst = off / sizeof(X86PDE);
381 unsigned iShwPdpt = iGst / 256;
382 unsigned iShw = (iGst % 256) * 2;
383 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
384
385 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x idx = %d page idx=%d\n", iGst, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
386 if (iShwPdpt == pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
387 {
388 for (unsigned i=0;i<2;i++)
389 {
390 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
391 {
392 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
393 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
394 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
395 break;
396 }
397 else
398 if (uShw.pPDPae->a[iShw+i].n.u1Present)
399 {
400 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
401 pgmPoolFree(pPool->CTX_SUFF(pVM),
402 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
403 pPage->idx,
404 iShw + i);
405 uShw.pPDPae->a[iShw+i].u = 0;
406 }
407
408 /* paranoia / a bit assumptive. */
409 if ( pCpu
410 && (off & 3)
411 && (off & 3) + cbWrite > 4)
412 {
413 const unsigned iShw2 = iShw + 2 + i;
414 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
415 {
416 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
417 {
418 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
419 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
420 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
421 break;
422 }
423 else
424 if (uShw.pPDPae->a[iShw2].n.u1Present)
425 {
426 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
427 pgmPoolFree(pPool->CTX_SUFF(pVM),
428 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
429 pPage->idx,
430 iShw2);
431 uShw.pPDPae->a[iShw2].u = 0;
432 }
433 }
434 }
435 }
436 }
437 break;
438 }
439# endif
440
441
442 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
443 {
444 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
445 const unsigned iShw = off / sizeof(X86PTEPAE);
446 if (uShw.pPTPae->a[iShw].n.u1Present)
447 {
448# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
449 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
450 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
451 pgmPoolTracDerefGCPhysHint(pPool, pPage,
452 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
453 pGstPte->u & X86_PTE_PAE_PG_MASK);
454# endif
455 uShw.pPTPae->a[iShw].u = 0;
456 }
457
458 /* paranoia / a bit assumptive. */
459 if ( pCpu
460 && (off & 7)
461 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
462 {
463 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
464 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
465
466 if (uShw.pPTPae->a[iShw2].n.u1Present)
467 {
468# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
469 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
470 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
471 pgmPoolTracDerefGCPhysHint(pPool, pPage,
472 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
473 pGstPte->u & X86_PTE_PAE_PG_MASK);
474# endif
475 uShw.pPTPae->a[iShw2].u = 0;
476 }
477 }
478
479 break;
480 }
481
482# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
483 case PGMPOOLKIND_32BIT_PD:
484# else
485 case PGMPOOLKIND_ROOT_32BIT_PD:
486# endif
487 {
488 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
489 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
490 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
491 {
492 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
493 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
494 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
495 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
496 break;
497 }
498# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
499 else
500 {
501 if (uShw.pPD->a[iShw].n.u1Present)
502 {
503 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
504 pgmPoolFree(pPool->CTX_SUFF(pVM),
505 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
506 pPage->idx,
507 iShw);
508 uShw.pPD->a[iShw].u = 0;
509 }
510 }
511# endif
512 /* paranoia / a bit assumptive. */
513 if ( pCpu
514 && (off & 3)
515 && (off & 3) + cbWrite > sizeof(X86PTE))
516 {
517 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
518 if ( iShw2 != iShw
519 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
520 {
521
522 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
523 {
524 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
525 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
526 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
527 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
528 }
529# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
530 else
531 {
532 if (uShw.pPD->a[iShw2].n.u1Present)
533 {
534 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
535 pgmPoolFree(pPool->CTX_SUFF(pVM),
536 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
537 pPage->idx,
538 iShw2);
539 uShw.pPD->a[iShw2].u = 0;
540 }
541 }
542# endif
543 }
544 }
545#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
546 if ( uShw.pPD->a[iShw].n.u1Present
547 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
548 {
549 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
550# ifdef IN_RC /* TLB load - we're pushing things a bit... */
551 ASMProbeReadByte(pvAddress);
552# endif
553 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
554 uShw.pPD->a[iShw].u = 0;
555 }
556#endif
557 break;
558 }
559
560# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
561 case PGMPOOLKIND_ROOT_PAE_PD:
562 {
563 unsigned iGst = off / sizeof(X86PDE); // ASSUMING 32-bit guest paging!
564 unsigned iShwPdpt = iGst / 256;
565 unsigned iShw = (iGst % 256) * 2;
566 Assert(pPage->idx == PGMPOOL_IDX_PAE_PD);
567 PPGMPOOLPAGE pPage2 = pPage + 1 + iShwPdpt;
568 Assert(pPage2->idx == PGMPOOL_IDX_PAE_PD_0 + iShwPdpt);
569 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage2);
570 for (unsigned i = 0; i < 2; i++, iShw++)
571 {
572 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
573 {
574 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
575 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
576 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw));
577 }
578 /* paranoia / a bit assumptive. */
579 else if ( pCpu
580 && (off & 3)
581 && (off & 3) + cbWrite > 4)
582 {
583 const unsigned iShw2 = iShw + 2;
584 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a) /** @todo was completely wrong, it's better now after #1865 but still wrong from cross PD. */
585 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
586 {
587 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
588 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
589 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
590 }
591 }
592#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
593 if ( uShw.pPDPae->a[iShw].n.u1Present
594 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
595 {
596 LogFlow(("pgmPoolMonitorChainChanging: iShwPdpt=%#x iShw=%#x: %RX64 -> freeing it!\n", iShwPdpt, iShw, uShw.pPDPae->a[iShw].u));
597# ifdef IN_RC /* TLB load - we're pushing things a bit... */
598 ASMProbeReadByte(pvAddress);
599# endif
600 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + iShwPdpt * X86_PG_PAE_ENTRIES);
601 uShw.pPDPae->a[iShw].u = 0;
602 }
603#endif
604 }
605 break;
606 }
607# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
608
609 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
610 {
611 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
612 const unsigned iShw = off / sizeof(X86PDEPAE);
613 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
614 {
615 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
616 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
617 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
618 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
619 break;
620 }
621#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
622 /*
623 * Causes trouble when the guest uses a PDE to refer to the whole page table level
624 * structure. (Invalidate here; faults later on when it tries to change the page
625 * table entries -> recheck; probably only applies to the RC case.)
626 */
627 else
628 {
629 if (uShw.pPDPae->a[iShw].n.u1Present)
630 {
631 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
632 pgmPoolFree(pPool->CTX_SUFF(pVM),
633 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
634# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
635 pPage->idx,
636 iShw);
637# else
638 /* Note: hardcoded PAE implementation dependency */
639 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
640 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
641# endif
642 uShw.pPDPae->a[iShw].u = 0;
643 }
644 }
645#endif
646 /* paranoia / a bit assumptive. */
647 if ( pCpu
648 && (off & 7)
649 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
650 {
651 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
652 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
653
654 if ( iShw2 != iShw
655 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
656 {
657 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
658 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
659 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
660 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
661 }
662#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
663 else if (uShw.pPDPae->a[iShw2].n.u1Present)
664 {
665 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
666 pgmPoolFree(pPool->CTX_SUFF(pVM),
667 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
668# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
669 pPage->idx,
670 iShw2);
671# else
672 /* Note: hardcoded PAE implementation dependency */
673 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
674 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
675# endif
676 uShw.pPDPae->a[iShw2].u = 0;
677 }
678#endif
679 }
680 break;
681 }
682
683# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
684 case PGMPOOLKIND_PAE_PDPT:
685# else
686 case PGMPOOLKIND_ROOT_PDPT:
687# endif
688 {
689 /*
690 * Hopefully this doesn't happen very often:
691 * - touching unused parts of the page
692 * - messing with the bits of pd pointers without changing the physical address
693 */
694# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
695 /* PDPT roots are not page aligned; 32 byte only! */
696 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
697# else
698 const unsigned offPdpt = off;
699# endif
700 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
701 const unsigned iShw = offPdpt / sizeof(X86PDPE);
702 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
703 {
704 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
705 {
706 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
707 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
708 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
709 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
710 break;
711 }
712# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
713 else
714 if (uShw.pPDPT->a[iShw].n.u1Present)
715 {
716 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
717 pgmPoolFree(pPool->CTX_SUFF(pVM),
718 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
719 pPage->idx,
720 iShw);
721 uShw.pPDPT->a[iShw].u = 0;
722 }
723# endif
724
725 /* paranoia / a bit assumptive. */
726 if ( pCpu
727 && (offPdpt & 7)
728 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
729 {
730 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
731 if ( iShw2 != iShw
732 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
733 {
734 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
735 {
736 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
737 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
738 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
739 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
740 }
741# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
742 else
743 if (uShw.pPDPT->a[iShw2].n.u1Present)
744 {
745 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
746 pgmPoolFree(pPool->CTX_SUFF(pVM),
747 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
748 pPage->idx,
749 iShw2);
750 uShw.pPDPT->a[iShw2].u = 0;
751 }
752# endif
753 }
754 }
755 }
756 break;
757 }
758
759#ifndef IN_RC
760 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
761 {
762 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
763 const unsigned iShw = off / sizeof(X86PDEPAE);
764 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
765 {
766 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
767 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
768 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
769 }
770 else
771 {
772 if (uShw.pPDPae->a[iShw].n.u1Present)
773 {
774 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
775 pgmPoolFree(pPool->CTX_SUFF(pVM),
776 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
777 pPage->idx,
778 iShw);
779 uShw.pPDPae->a[iShw].u = 0;
780 }
781 }
782 /* paranoia / a bit assumptive. */
783 if ( pCpu
784 && (off & 7)
785 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
786 {
787 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
788 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
789
790 if ( iShw2 != iShw
791 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
792 {
793 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
794 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
795 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
796 }
797 else
798 if (uShw.pPDPae->a[iShw2].n.u1Present)
799 {
800 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
801 pgmPoolFree(pPool->CTX_SUFF(pVM),
802 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
803 pPage->idx,
804 iShw2);
805 uShw.pPDPae->a[iShw2].u = 0;
806 }
807 }
808 break;
809 }
810
811 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
812 {
813 /*
814 * Hopefully this doesn't happen very often:
815 * - messing with the bits of pd pointers without changing the physical address
816 */
817# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
818 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
819# endif
820 {
821 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
822 const unsigned iShw = off / sizeof(X86PDPE);
823 if (uShw.pPDPT->a[iShw].n.u1Present)
824 {
825 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
826 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
827 uShw.pPDPT->a[iShw].u = 0;
828 }
829 /* paranoia / a bit assumptive. */
830 if ( pCpu
831 && (off & 7)
832 && (off & 7) + cbWrite > sizeof(X86PDPE))
833 {
834 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
835 if (uShw.pPDPT->a[iShw2].n.u1Present)
836 {
837 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
838 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
839 uShw.pPDPT->a[iShw2].u = 0;
840 }
841 }
842 }
843 break;
844 }
845
846 case PGMPOOLKIND_64BIT_PML4:
847 {
848 /*
849 * Hopefully this doesn't happen very often:
850 * - messing with the bits of pd pointers without changing the physical address
851 */
852# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
853 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
854# endif
855 {
856 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
857 const unsigned iShw = off / sizeof(X86PDPE);
858 if (uShw.pPML4->a[iShw].n.u1Present)
859 {
860 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
861 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
862 uShw.pPML4->a[iShw].u = 0;
863 }
864 /* paranoia / a bit assumptive. */
865 if ( pCpu
866 && (off & 7)
867 && (off & 7) + cbWrite > sizeof(X86PDPE))
868 {
869 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
870 if (uShw.pPML4->a[iShw2].n.u1Present)
871 {
872 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
873 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
874 uShw.pPML4->a[iShw2].u = 0;
875 }
876 }
877 }
878 break;
879 }
880#endif /* IN_RING0 */
881
882 default:
883 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
884 }
885
886 /* next */
887 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
888 return;
889 pPage = &pPool->aPages[pPage->iMonitoredNext];
890 }
891}
892
893
894# ifndef IN_RING3
895/**
896 * Checks if a access could be a fork operation in progress.
897 *
898 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
899 *
900 * @returns true if it's likly that we're forking, otherwise false.
901 * @param pPool The pool.
902 * @param pCpu The disassembled instruction.
903 * @param offFault The access offset.
904 */
905DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
906{
907 /*
908 * i386 linux is using btr to clear X86_PTE_RW.
909 * The functions involved are (2.6.16 source inspection):
910 * clear_bit
911 * ptep_set_wrprotect
912 * copy_one_pte
913 * copy_pte_range
914 * copy_pmd_range
915 * copy_pud_range
916 * copy_page_range
917 * dup_mmap
918 * dup_mm
919 * copy_mm
920 * copy_process
921 * do_fork
922 */
923 if ( pCpu->pCurInstr->opcode == OP_BTR
924 && !(offFault & 4)
925 /** @todo Validate that the bit index is X86_PTE_RW. */
926 )
927 {
928 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
929 return true;
930 }
931 return false;
932}
933
934
935/**
936 * Determine whether the page is likely to have been reused.
937 *
938 * @returns true if we consider the page as being reused for a different purpose.
939 * @returns false if we consider it to still be a paging page.
940 * @param pVM VM Handle.
941 * @param pPage The page in question.
942 * @param pRegFrame Trap register frame.
943 * @param pCpu The disassembly info for the faulting instruction.
944 * @param pvFault The fault address.
945 *
946 * @remark The REP prefix check is left to the caller because of STOSD/W.
947 */
948DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
949{
950#ifndef IN_RC
951 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
952 if ( HWACCMHasPendingIrq(pVM)
953 && (pRegFrame->rsp - pvFault) < 32)
954 {
955 /* Fault caused by stack writes while trying to inject an interrupt event. */
956 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
957 return true;
958 }
959#else
960 NOREF(pVM); NOREF(pvFault);
961#endif
962
963 switch (pCpu->pCurInstr->opcode)
964 {
965 /* call implies the actual push of the return address faulted */
966 case OP_CALL:
967 Log4(("pgmPoolMonitorIsReused: CALL\n"));
968 return true;
969 case OP_PUSH:
970 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
971 return true;
972 case OP_PUSHF:
973 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
974 return true;
975 case OP_PUSHA:
976 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
977 return true;
978 case OP_FXSAVE:
979 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
980 return true;
981 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
982 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
983 return true;
984 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
985 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
986 return true;
987 case OP_MOVSWD:
988 case OP_STOSWD:
989 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
990 && pRegFrame->rcx >= 0x40
991 )
992 {
993 Assert(pCpu->mode == CPUMODE_64BIT);
994
995 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
996 return true;
997 }
998 return false;
999 }
1000 if ( (pCpu->param1.flags & USE_REG_GEN32)
1001 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
1002 {
1003 Log4(("pgmPoolMonitorIsReused: ESP\n"));
1004 return true;
1005 }
1006
1007 //if (pPage->fCR3Mix)
1008 // return false;
1009 return false;
1010}
1011
1012
1013/**
1014 * Flushes the page being accessed.
1015 *
1016 * @returns VBox status code suitable for scheduling.
1017 * @param pVM The VM handle.
1018 * @param pPool The pool.
1019 * @param pPage The pool page (head).
1020 * @param pCpu The disassembly of the write instruction.
1021 * @param pRegFrame The trap register frame.
1022 * @param GCPhysFault The fault address as guest physical address.
1023 * @param pvFault The fault address.
1024 */
1025static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1026 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1027{
1028 /*
1029 * First, do the flushing.
1030 */
1031 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
1032
1033 /*
1034 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
1035 */
1036 uint32_t cbWritten;
1037 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
1038 if (RT_SUCCESS(rc2))
1039 pRegFrame->rip += pCpu->opsize;
1040 else if (rc2 == VERR_EM_INTERPRETER)
1041 {
1042#ifdef IN_RC
1043 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
1044 {
1045 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
1046 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
1047 rc = VINF_SUCCESS;
1048 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
1049 }
1050 else
1051#endif
1052 {
1053 rc = VINF_EM_RAW_EMULATE_INSTR;
1054 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1055 }
1056 }
1057 else
1058 rc = rc2;
1059
1060 /* See use in pgmPoolAccessHandlerSimple(). */
1061 PGM_INVL_GUEST_TLBS();
1062
1063 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
1064 return rc;
1065
1066}
1067
1068
1069/**
1070 * Handles the STOSD write accesses.
1071 *
1072 * @returns VBox status code suitable for scheduling.
1073 * @param pVM The VM handle.
1074 * @param pPool The pool.
1075 * @param pPage The pool page (head).
1076 * @param pCpu The disassembly of the write instruction.
1077 * @param pRegFrame The trap register frame.
1078 * @param GCPhysFault The fault address as guest physical address.
1079 * @param pvFault The fault address.
1080 */
1081DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1082 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1083{
1084 Assert(pCpu->mode == CPUMODE_32BIT);
1085
1086 /*
1087 * Increment the modification counter and insert it into the list
1088 * of modified pages the first time.
1089 */
1090 if (!pPage->cModifications++)
1091 pgmPoolMonitorModifiedInsert(pPool, pPage);
1092
1093 /*
1094 * Execute REP STOSD.
1095 *
1096 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
1097 * write situation, meaning that it's safe to write here.
1098 */
1099#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1100 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1101#endif
1102 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
1103 while (pRegFrame->ecx)
1104 {
1105#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1106 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1107 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1108 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1109#else
1110 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1111#endif
1112#ifdef IN_RC
1113 *(uint32_t *)pu32 = pRegFrame->eax;
1114#else
1115 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
1116#endif
1117 pu32 += 4;
1118 GCPhysFault += 4;
1119 pRegFrame->edi += 4;
1120 pRegFrame->ecx--;
1121 }
1122 pRegFrame->rip += pCpu->opsize;
1123
1124 /* See use in pgmPoolAccessHandlerSimple(). */
1125 PGM_INVL_GUEST_TLBS();
1126
1127 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1128 return VINF_SUCCESS;
1129}
1130
1131
1132/**
1133 * Handles the simple write accesses.
1134 *
1135 * @returns VBox status code suitable for scheduling.
1136 * @param pVM The VM handle.
1137 * @param pPool The pool.
1138 * @param pPage The pool page (head).
1139 * @param pCpu The disassembly of the write instruction.
1140 * @param pRegFrame The trap register frame.
1141 * @param GCPhysFault The fault address as guest physical address.
1142 * @param pvFault The fault address.
1143 */
1144DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1145 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1146{
1147 /*
1148 * Increment the modification counter and insert it into the list
1149 * of modified pages the first time.
1150 */
1151 if (!pPage->cModifications++)
1152 pgmPoolMonitorModifiedInsert(pPool, pPage);
1153
1154 /*
1155 * Clear all the pages. ASSUMES that pvFault is readable.
1156 */
1157#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1158 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1159 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1160 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1161 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1162#else
1163 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1164#endif
1165
1166 /*
1167 * Interpret the instruction.
1168 */
1169 uint32_t cb;
1170 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
1171 if (RT_SUCCESS(rc))
1172 pRegFrame->rip += pCpu->opsize;
1173 else if (rc == VERR_EM_INTERPRETER)
1174 {
1175 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1176 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
1177 rc = VINF_EM_RAW_EMULATE_INSTR;
1178 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1179 }
1180
1181 /*
1182 * Quick hack, with logging enabled we're getting stale
1183 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1184 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1185 * have to be fixed to support this. But that'll have to wait till next week.
1186 *
1187 * An alternative is to keep track of the changed PTEs together with the
1188 * GCPhys from the guest PT. This may proove expensive though.
1189 *
1190 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1191 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1192 */
1193 PGM_INVL_GUEST_TLBS();
1194
1195 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1196 return rc;
1197}
1198
1199
1200/**
1201 * \#PF Handler callback for PT write accesses.
1202 *
1203 * @returns VBox status code (appropriate for GC return).
1204 * @param pVM VM Handle.
1205 * @param uErrorCode CPU Error code.
1206 * @param pRegFrame Trap register frame.
1207 * NULL on DMA and other non CPU access.
1208 * @param pvFault The fault address (cr2).
1209 * @param GCPhysFault The GC physical address corresponding to pvFault.
1210 * @param pvUser User argument.
1211 */
1212DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1213{
1214 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1215 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1216 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1217 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1218
1219 /*
1220 * We should ALWAYS have the list head as user parameter. This
1221 * is because we use that page to record the changes.
1222 */
1223 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1224
1225 /*
1226 * Disassemble the faulting instruction.
1227 */
1228 DISCPUSTATE Cpu;
1229 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1230 AssertRCReturn(rc, rc);
1231
1232 /*
1233 * Check if it's worth dealing with.
1234 */
1235 bool fReused = false;
1236 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1237 || pPage->fCR3Mix)
1238 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1239 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1240 {
1241 /*
1242 * Simple instructions, no REP prefix.
1243 */
1244 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1245 {
1246 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1247 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1248 return rc;
1249 }
1250
1251 /*
1252 * Windows is frequently doing small memset() operations (netio test 4k+).
1253 * We have to deal with these or we'll kill the cache and performance.
1254 */
1255 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1256 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1257 && pRegFrame->ecx <= 0x20
1258 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1259 && !((uintptr_t)pvFault & 3)
1260 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1261 && Cpu.mode == CPUMODE_32BIT
1262 && Cpu.opmode == CPUMODE_32BIT
1263 && Cpu.addrmode == CPUMODE_32BIT
1264 && Cpu.prefix == PREFIX_REP
1265 && !pRegFrame->eflags.Bits.u1DF
1266 )
1267 {
1268 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1269 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1270 return rc;
1271 }
1272
1273 /* REP prefix, don't bother. */
1274 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1275 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1276 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1277 }
1278
1279 /*
1280 * Not worth it, so flush it.
1281 *
1282 * If we considered it to be reused, don't to back to ring-3
1283 * to emulate failed instructions since we usually cannot
1284 * interpret then. This may be a bit risky, in which case
1285 * the reuse detection must be fixed.
1286 */
1287 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1288 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1289 rc = VINF_SUCCESS;
1290 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1291 return rc;
1292}
1293
1294# endif /* !IN_RING3 */
1295#endif /* PGMPOOL_WITH_MONITORING */
1296
1297#ifdef PGMPOOL_WITH_CACHE
1298
1299/**
1300 * Inserts a page into the GCPhys hash table.
1301 *
1302 * @param pPool The pool.
1303 * @param pPage The page.
1304 */
1305DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1306{
1307 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1308 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1309 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1310 pPage->iNext = pPool->aiHash[iHash];
1311 pPool->aiHash[iHash] = pPage->idx;
1312}
1313
1314
1315/**
1316 * Removes a page from the GCPhys hash table.
1317 *
1318 * @param pPool The pool.
1319 * @param pPage The page.
1320 */
1321DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1322{
1323 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1324 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1325 if (pPool->aiHash[iHash] == pPage->idx)
1326 pPool->aiHash[iHash] = pPage->iNext;
1327 else
1328 {
1329 uint16_t iPrev = pPool->aiHash[iHash];
1330 for (;;)
1331 {
1332 const int16_t i = pPool->aPages[iPrev].iNext;
1333 if (i == pPage->idx)
1334 {
1335 pPool->aPages[iPrev].iNext = pPage->iNext;
1336 break;
1337 }
1338 if (i == NIL_PGMPOOL_IDX)
1339 {
1340 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1341 break;
1342 }
1343 iPrev = i;
1344 }
1345 }
1346 pPage->iNext = NIL_PGMPOOL_IDX;
1347}
1348
1349
1350/**
1351 * Frees up one cache page.
1352 *
1353 * @returns VBox status code.
1354 * @retval VINF_SUCCESS on success.
1355 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1356 * @param pPool The pool.
1357 * @param iUser The user index.
1358 */
1359static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1360{
1361#ifndef IN_RC
1362 const PVM pVM = pPool->CTX_SUFF(pVM);
1363#endif
1364 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1365 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1366
1367 /*
1368 * Select one page from the tail of the age list.
1369 */
1370 uint16_t iToFree = pPool->iAgeTail;
1371 if (iToFree == iUser)
1372 iToFree = pPool->aPages[iToFree].iAgePrev;
1373/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1374 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1375 {
1376 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1377 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1378 {
1379 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1380 continue;
1381 iToFree = i;
1382 break;
1383 }
1384 }
1385*/
1386
1387 Assert(iToFree != iUser);
1388 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1389
1390 PPGMPOOLPAGE pPage = &pPool->aPages[iToFree];
1391
1392 /*
1393 * Reject any attempts at flushing the currently active shadow CR3 mapping
1394 */
1395 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
1396 {
1397 /* Refresh the cr3 mapping by putting it at the head of the age list. */
1398 LogFlow(("pgmPoolCacheFreeOne refuse CR3 mapping\n"));
1399 pgmPoolCacheUsed(pPool, pPage);
1400 return pgmPoolCacheFreeOne(pPool, iUser);
1401 }
1402
1403 int rc = pgmPoolFlushPage(pPool, pPage);
1404 if (rc == VINF_SUCCESS)
1405 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1406 return rc;
1407}
1408
1409
1410/**
1411 * Checks if a kind mismatch is really a page being reused
1412 * or if it's just normal remappings.
1413 *
1414 * @returns true if reused and the cached page (enmKind1) should be flushed
1415 * @returns false if not reused.
1416 * @param enmKind1 The kind of the cached page.
1417 * @param enmKind2 The kind of the requested page.
1418 */
1419static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1420{
1421 switch (enmKind1)
1422 {
1423 /*
1424 * Never reuse them. There is no remapping in non-paging mode.
1425 */
1426 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1427 case PGMPOOLKIND_32BIT_PD_PHYS:
1428 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1429 case PGMPOOLKIND_PAE_PD_PHYS:
1430 case PGMPOOLKIND_PAE_PDPT_PHYS:
1431 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1432 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1433 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1434 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1435 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1436#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1437 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1438 return false;
1439#else
1440 return true;
1441#endif
1442
1443 /*
1444 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1445 */
1446 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1447 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1448 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1449 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1450 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1451 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1452 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1453 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1454 case PGMPOOLKIND_32BIT_PD:
1455 switch (enmKind2)
1456 {
1457 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1458 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1459 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1460 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1461 case PGMPOOLKIND_64BIT_PML4:
1462 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1463 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1464 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1465 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1466 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1467 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1468 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1469 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1470 return true;
1471 default:
1472 return false;
1473 }
1474
1475 /*
1476 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1477 */
1478 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1479 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1480 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1481 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1482 case PGMPOOLKIND_64BIT_PML4:
1483 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1484 switch (enmKind2)
1485 {
1486 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1487 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1488 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1489 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1490 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1491 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1492 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1493 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1494 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1495 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1496 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1497 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1498 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1499 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1500 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1501 return true;
1502 default:
1503 return false;
1504 }
1505
1506 /*
1507 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1508 */
1509#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1510 case PGMPOOLKIND_ROOT_32BIT_PD:
1511 case PGMPOOLKIND_ROOT_PAE_PD:
1512 case PGMPOOLKIND_ROOT_PDPT:
1513#endif
1514 case PGMPOOLKIND_ROOT_NESTED:
1515 return false;
1516
1517 default:
1518 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1519 }
1520}
1521
1522
1523/**
1524 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1525 *
1526 * @returns VBox status code.
1527 * @retval VINF_PGM_CACHED_PAGE on success.
1528 * @retval VERR_FILE_NOT_FOUND if not found.
1529 * @param pPool The pool.
1530 * @param GCPhys The GC physical address of the page we're gonna shadow.
1531 * @param enmKind The kind of mapping.
1532 * @param iUser The shadow page pool index of the user table.
1533 * @param iUserTable The index into the user table (shadowed).
1534 * @param ppPage Where to store the pointer to the page.
1535 */
1536static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1537{
1538#ifndef IN_RC
1539 const PVM pVM = pPool->CTX_SUFF(pVM);
1540#endif
1541 /*
1542 * Look up the GCPhys in the hash.
1543 */
1544 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1545 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
1546 if (i != NIL_PGMPOOL_IDX)
1547 {
1548 do
1549 {
1550 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1551 Log3(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1552 if (pPage->GCPhys == GCPhys)
1553 {
1554 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1555 {
1556 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1557 if (RT_SUCCESS(rc))
1558 {
1559 *ppPage = pPage;
1560 STAM_COUNTER_INC(&pPool->StatCacheHits);
1561 return VINF_PGM_CACHED_PAGE;
1562 }
1563 return rc;
1564 }
1565
1566 /*
1567 * The kind is different. In some cases we should now flush the page
1568 * as it has been reused, but in most cases this is normal remapping
1569 * of PDs as PT or big pages using the GCPhys field in a slightly
1570 * different way than the other kinds.
1571 */
1572 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1573 {
1574 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1575 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1576 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1577 break;
1578 }
1579 }
1580
1581 /* next */
1582 i = pPage->iNext;
1583 } while (i != NIL_PGMPOOL_IDX);
1584 }
1585
1586 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
1587 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1588 return VERR_FILE_NOT_FOUND;
1589}
1590
1591
1592/**
1593 * Inserts a page into the cache.
1594 *
1595 * @param pPool The pool.
1596 * @param pPage The cached page.
1597 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1598 */
1599static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1600{
1601 /*
1602 * Insert into the GCPhys hash if the page is fit for that.
1603 */
1604 Assert(!pPage->fCached);
1605 if (fCanBeCached)
1606 {
1607 pPage->fCached = true;
1608 pgmPoolHashInsert(pPool, pPage);
1609 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1610 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1611 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1612 }
1613 else
1614 {
1615 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1616 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1617 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1618 }
1619
1620 /*
1621 * Insert at the head of the age list.
1622 */
1623 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1624 pPage->iAgeNext = pPool->iAgeHead;
1625 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1626 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1627 else
1628 pPool->iAgeTail = pPage->idx;
1629 pPool->iAgeHead = pPage->idx;
1630}
1631
1632
1633/**
1634 * Flushes a cached page.
1635 *
1636 * @param pPool The pool.
1637 * @param pPage The cached page.
1638 */
1639static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1640{
1641 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1642
1643 /*
1644 * Remove the page from the hash.
1645 */
1646 if (pPage->fCached)
1647 {
1648 pPage->fCached = false;
1649 pgmPoolHashRemove(pPool, pPage);
1650 }
1651 else
1652 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1653
1654 /*
1655 * Remove it from the age list.
1656 */
1657 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1658 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1659 else
1660 pPool->iAgeTail = pPage->iAgePrev;
1661 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1662 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1663 else
1664 pPool->iAgeHead = pPage->iAgeNext;
1665 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1666 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1667}
1668
1669#endif /* PGMPOOL_WITH_CACHE */
1670#ifdef PGMPOOL_WITH_MONITORING
1671
1672/**
1673 * Looks for pages sharing the monitor.
1674 *
1675 * @returns Pointer to the head page.
1676 * @returns NULL if not found.
1677 * @param pPool The Pool
1678 * @param pNewPage The page which is going to be monitored.
1679 */
1680static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1681{
1682#ifdef PGMPOOL_WITH_CACHE
1683 /*
1684 * Look up the GCPhys in the hash.
1685 */
1686 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1687 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1688 if (i == NIL_PGMPOOL_IDX)
1689 return NULL;
1690 do
1691 {
1692 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1693 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1694 && pPage != pNewPage)
1695 {
1696 switch (pPage->enmKind)
1697 {
1698 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1699 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1700 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1701 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1702 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1703 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1704 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1705 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1706 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1707 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1708 case PGMPOOLKIND_64BIT_PML4:
1709#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1710 case PGMPOOLKIND_32BIT_PD:
1711 case PGMPOOLKIND_PAE_PDPT:
1712#else
1713 case PGMPOOLKIND_ROOT_32BIT_PD:
1714 case PGMPOOLKIND_ROOT_PAE_PD:
1715 case PGMPOOLKIND_ROOT_PDPT:
1716#endif
1717 {
1718 /* find the head */
1719 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1720 {
1721 Assert(pPage->iMonitoredPrev != pPage->idx);
1722 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1723 }
1724 return pPage;
1725 }
1726
1727 /* ignore, no monitoring. */
1728 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1729 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1730 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1731 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1732 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1733 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1734 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1735 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1736 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1737 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1738 case PGMPOOLKIND_ROOT_NESTED:
1739 case PGMPOOLKIND_PAE_PD_PHYS:
1740 case PGMPOOLKIND_PAE_PDPT_PHYS:
1741 case PGMPOOLKIND_32BIT_PD_PHYS:
1742#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1743 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1744#endif
1745 break;
1746 default:
1747 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1748 }
1749 }
1750
1751 /* next */
1752 i = pPage->iNext;
1753 } while (i != NIL_PGMPOOL_IDX);
1754#endif
1755 return NULL;
1756}
1757
1758
1759/**
1760 * Enabled write monitoring of a guest page.
1761 *
1762 * @returns VBox status code.
1763 * @retval VINF_SUCCESS on success.
1764 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1765 * @param pPool The pool.
1766 * @param pPage The cached page.
1767 */
1768static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1769{
1770 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1771
1772 /*
1773 * Filter out the relevant kinds.
1774 */
1775 switch (pPage->enmKind)
1776 {
1777 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1778 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1779 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1780 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1781 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1782 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1783 case PGMPOOLKIND_64BIT_PML4:
1784#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1785 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1786 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1787 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1788 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1789 case PGMPOOLKIND_32BIT_PD:
1790 case PGMPOOLKIND_PAE_PDPT:
1791#else
1792 case PGMPOOLKIND_ROOT_PDPT:
1793#endif
1794 break;
1795
1796 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1797 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1798 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1799 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1800 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1801 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1802 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1803 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1804 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1805 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1806 case PGMPOOLKIND_ROOT_NESTED:
1807 /* Nothing to monitor here. */
1808 return VINF_SUCCESS;
1809
1810#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1811 case PGMPOOLKIND_32BIT_PD_PHYS:
1812 case PGMPOOLKIND_PAE_PDPT_PHYS:
1813 case PGMPOOLKIND_PAE_PD_PHYS:
1814 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1815 /* Nothing to monitor here. */
1816 return VINF_SUCCESS;
1817#else
1818 case PGMPOOLKIND_ROOT_32BIT_PD:
1819 case PGMPOOLKIND_ROOT_PAE_PD:
1820#endif
1821#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1822 break;
1823#else
1824 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1825#endif
1826 default:
1827 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1828 }
1829
1830 /*
1831 * Install handler.
1832 */
1833 int rc;
1834 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1835 if (pPageHead)
1836 {
1837 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1838 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1839 pPage->iMonitoredPrev = pPageHead->idx;
1840 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1841 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1842 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1843 pPageHead->iMonitoredNext = pPage->idx;
1844 rc = VINF_SUCCESS;
1845 }
1846 else
1847 {
1848 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1849 PVM pVM = pPool->CTX_SUFF(pVM);
1850 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1851 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1852 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1853 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1854 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1855 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1856 pPool->pszAccessHandler);
1857 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1858 * the heap size should suffice. */
1859 AssertFatalRC(rc);
1860 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1861 rc = VERR_PGM_POOL_CLEARED;
1862 }
1863 pPage->fMonitored = true;
1864 return rc;
1865}
1866
1867
1868/**
1869 * Disables write monitoring of a guest page.
1870 *
1871 * @returns VBox status code.
1872 * @retval VINF_SUCCESS on success.
1873 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1874 * @param pPool The pool.
1875 * @param pPage The cached page.
1876 */
1877static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1878{
1879 /*
1880 * Filter out the relevant kinds.
1881 */
1882 switch (pPage->enmKind)
1883 {
1884 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1885 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1886 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1887 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1888 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1889 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1890 case PGMPOOLKIND_64BIT_PML4:
1891#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1892 case PGMPOOLKIND_32BIT_PD:
1893 case PGMPOOLKIND_PAE_PDPT:
1894 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1895 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1896 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1897 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1898#else
1899 case PGMPOOLKIND_ROOT_PDPT:
1900#endif
1901 break;
1902
1903 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1904 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1905 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1906 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1907 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1908 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1909 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1910 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1911 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1912 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1913 case PGMPOOLKIND_ROOT_NESTED:
1914 case PGMPOOLKIND_PAE_PD_PHYS:
1915 case PGMPOOLKIND_PAE_PDPT_PHYS:
1916 case PGMPOOLKIND_32BIT_PD_PHYS:
1917 /* Nothing to monitor here. */
1918 return VINF_SUCCESS;
1919
1920#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1921 case PGMPOOLKIND_ROOT_32BIT_PD:
1922 case PGMPOOLKIND_ROOT_PAE_PD:
1923#endif
1924#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1925 break;
1926#endif
1927#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1928 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1929#endif
1930 default:
1931 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1932 }
1933
1934 /*
1935 * Remove the page from the monitored list or uninstall it if last.
1936 */
1937 const PVM pVM = pPool->CTX_SUFF(pVM);
1938 int rc;
1939 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1940 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1941 {
1942 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1943 {
1944 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1945 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1946 pNewHead->fCR3Mix = pPage->fCR3Mix;
1947 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1948 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1949 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1950 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
1951 pPool->pszAccessHandler);
1952 AssertFatalRCSuccess(rc);
1953 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1954 }
1955 else
1956 {
1957 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1958 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1959 {
1960 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1961 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1962 }
1963 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1964 rc = VINF_SUCCESS;
1965 }
1966 }
1967 else
1968 {
1969 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1970 AssertFatalRC(rc);
1971 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1972 rc = VERR_PGM_POOL_CLEARED;
1973 }
1974 pPage->fMonitored = false;
1975
1976 /*
1977 * Remove it from the list of modified pages (if in it).
1978 */
1979 pgmPoolMonitorModifiedRemove(pPool, pPage);
1980
1981 return rc;
1982}
1983
1984# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1985
1986/**
1987 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1988 *
1989 * @param pPool The Pool.
1990 * @param pPage A page in the chain.
1991 * @param fCR3Mix The new fCR3Mix value.
1992 */
1993static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1994{
1995 /* current */
1996 pPage->fCR3Mix = fCR3Mix;
1997
1998 /* before */
1999 int16_t idx = pPage->iMonitoredPrev;
2000 while (idx != NIL_PGMPOOL_IDX)
2001 {
2002 pPool->aPages[idx].fCR3Mix = fCR3Mix;
2003 idx = pPool->aPages[idx].iMonitoredPrev;
2004 }
2005
2006 /* after */
2007 idx = pPage->iMonitoredNext;
2008 while (idx != NIL_PGMPOOL_IDX)
2009 {
2010 pPool->aPages[idx].fCR3Mix = fCR3Mix;
2011 idx = pPool->aPages[idx].iMonitoredNext;
2012 }
2013}
2014
2015
2016/**
2017 * Installs or modifies monitoring of a CR3 page (special).
2018 *
2019 * We're pretending the CR3 page is shadowed by the pool so we can use the
2020 * generic mechanisms in detecting chained monitoring. (This also gives us a
2021 * tast of what code changes are required to really pool CR3 shadow pages.)
2022 *
2023 * @returns VBox status code.
2024 * @param pPool The pool.
2025 * @param idxRoot The CR3 (root) page index.
2026 * @param GCPhysCR3 The (new) CR3 value.
2027 */
2028int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
2029{
2030 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
2031 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
2032 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d} GCPhysCR3=%RGp\n",
2033 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
2034
2035 /*
2036 * The unlikely case where it already matches.
2037 */
2038 if (pPage->GCPhys == GCPhysCR3)
2039 {
2040 Assert(pPage->fMonitored);
2041 return VINF_SUCCESS;
2042 }
2043
2044 /*
2045 * Flush the current monitoring and remove it from the hash.
2046 */
2047 int rc = VINF_SUCCESS;
2048 if (pPage->fMonitored)
2049 {
2050 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
2051 rc = pgmPoolMonitorFlush(pPool, pPage);
2052 if (rc == VERR_PGM_POOL_CLEARED)
2053 rc = VINF_SUCCESS;
2054 else
2055 AssertFatalRC(rc);
2056 pgmPoolHashRemove(pPool, pPage);
2057 }
2058
2059 /*
2060 * Monitor the page at the new location and insert it into the hash.
2061 */
2062 pPage->GCPhys = GCPhysCR3;
2063 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
2064 if (rc2 != VERR_PGM_POOL_CLEARED)
2065 {
2066 AssertFatalRC(rc2);
2067 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
2068 rc = rc2;
2069 }
2070 pgmPoolHashInsert(pPool, pPage);
2071 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
2072 return rc;
2073}
2074
2075
2076/**
2077 * Removes the monitoring of a CR3 page (special).
2078 *
2079 * @returns VBox status code.
2080 * @param pPool The pool.
2081 * @param idxRoot The CR3 (root) page index.
2082 */
2083int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
2084{
2085 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
2086 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
2087 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d}\n",
2088 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
2089
2090 if (!pPage->fMonitored)
2091 return VINF_SUCCESS;
2092
2093 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
2094 int rc = pgmPoolMonitorFlush(pPool, pPage);
2095 if (rc != VERR_PGM_POOL_CLEARED)
2096 AssertFatalRC(rc);
2097 else
2098 rc = VINF_SUCCESS;
2099 pgmPoolHashRemove(pPool, pPage);
2100 Assert(!pPage->fMonitored);
2101 pPage->GCPhys = NIL_RTGCPHYS;
2102 return rc;
2103}
2104
2105# endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
2106
2107/**
2108 * Inserts the page into the list of modified pages.
2109 *
2110 * @param pPool The pool.
2111 * @param pPage The page.
2112 */
2113void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2114{
2115 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2116 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2117 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2118 && pPool->iModifiedHead != pPage->idx,
2119 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2120 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2121 pPool->iModifiedHead, pPool->cModifiedPages));
2122
2123 pPage->iModifiedNext = pPool->iModifiedHead;
2124 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2125 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2126 pPool->iModifiedHead = pPage->idx;
2127 pPool->cModifiedPages++;
2128#ifdef VBOX_WITH_STATISTICS
2129 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2130 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2131#endif
2132}
2133
2134
2135/**
2136 * Removes the page from the list of modified pages and resets the
2137 * moficiation counter.
2138 *
2139 * @param pPool The pool.
2140 * @param pPage The page which is believed to be in the list of modified pages.
2141 */
2142static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2143{
2144 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2145 if (pPool->iModifiedHead == pPage->idx)
2146 {
2147 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2148 pPool->iModifiedHead = pPage->iModifiedNext;
2149 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2150 {
2151 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2152 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2153 }
2154 pPool->cModifiedPages--;
2155 }
2156 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2157 {
2158 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2159 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2160 {
2161 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2162 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2163 }
2164 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2165 pPool->cModifiedPages--;
2166 }
2167 else
2168 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2169 pPage->cModifications = 0;
2170}
2171
2172
2173/**
2174 * Zaps the list of modified pages, resetting their modification counters in the process.
2175 *
2176 * @param pVM The VM handle.
2177 */
2178void pgmPoolMonitorModifiedClearAll(PVM pVM)
2179{
2180 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2181 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2182
2183 unsigned cPages = 0; NOREF(cPages);
2184 uint16_t idx = pPool->iModifiedHead;
2185 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2186 while (idx != NIL_PGMPOOL_IDX)
2187 {
2188 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2189 idx = pPage->iModifiedNext;
2190 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2191 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2192 pPage->cModifications = 0;
2193 Assert(++cPages);
2194 }
2195 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2196 pPool->cModifiedPages = 0;
2197}
2198
2199
2200#ifdef IN_RING3
2201/**
2202 * Clear all shadow pages and clear all modification counters.
2203 *
2204 * @param pVM The VM handle.
2205 * @remark Should only be used when monitoring is available, thus placed in
2206 * the PGMPOOL_WITH_MONITORING #ifdef.
2207 */
2208void pgmPoolClearAll(PVM pVM)
2209{
2210 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2211 STAM_PROFILE_START(&pPool->StatClearAll, c);
2212 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2213
2214 /*
2215 * Iterate all the pages until we've encountered all that in use.
2216 * This is simple but not quite optimal solution.
2217 */
2218 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2219 unsigned cLeft = pPool->cUsedPages;
2220 unsigned iPage = pPool->cCurPages;
2221 while (--iPage >= PGMPOOL_IDX_FIRST)
2222 {
2223 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2224 if (pPage->GCPhys != NIL_RTGCPHYS)
2225 {
2226 switch (pPage->enmKind)
2227 {
2228 /*
2229 * We only care about shadow page tables.
2230 */
2231 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2232 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2233 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2234 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2235 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2236 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2237 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2238 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2239 {
2240#ifdef PGMPOOL_WITH_USER_TRACKING
2241 if (pPage->cPresent)
2242#endif
2243 {
2244 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2245 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2246 ASMMemZeroPage(pvShw);
2247 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2248#ifdef PGMPOOL_WITH_USER_TRACKING
2249 pPage->cPresent = 0;
2250 pPage->iFirstPresent = ~0;
2251#endif
2252 }
2253 }
2254 /* fall thru */
2255
2256 default:
2257 Assert(!pPage->cModifications || ++cModifiedPages);
2258 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2259 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2260 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2261 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2262 pPage->cModifications = 0;
2263 break;
2264
2265 }
2266 if (!--cLeft)
2267 break;
2268 }
2269 }
2270
2271 /* swipe the special pages too. */
2272 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2273 {
2274 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2275 if (pPage->GCPhys != NIL_RTGCPHYS)
2276 {
2277 Assert(!pPage->cModifications || ++cModifiedPages);
2278 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2279 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2280 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2281 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2282 pPage->cModifications = 0;
2283 }
2284 }
2285
2286#ifndef DEBUG_michael
2287 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2288#endif
2289 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2290 pPool->cModifiedPages = 0;
2291
2292#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2293 /*
2294 * Clear all the GCPhys links and rebuild the phys ext free list.
2295 */
2296 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2297 pRam;
2298 pRam = pRam->CTX_SUFF(pNext))
2299 {
2300 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2301 while (iPage-- > 0)
2302 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2303 }
2304
2305 pPool->iPhysExtFreeHead = 0;
2306 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2307 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2308 for (unsigned i = 0; i < cMaxPhysExts; i++)
2309 {
2310 paPhysExts[i].iNext = i + 1;
2311 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2312 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2313 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2314 }
2315 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2316#endif
2317
2318
2319 pPool->cPresent = 0;
2320 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2321}
2322#endif /* IN_RING3 */
2323
2324
2325/**
2326 * Handle SyncCR3 pool tasks
2327 *
2328 * @returns VBox status code.
2329 * @retval VINF_SUCCESS if successfully added.
2330 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2331 * @param pVM The VM handle.
2332 * @remark Should only be used when monitoring is available, thus placed in
2333 * the PGMPOOL_WITH_MONITORING #ifdef.
2334 */
2335int pgmPoolSyncCR3(PVM pVM)
2336{
2337 /*
2338 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2339 * Occasionally we will have to clear all the shadow page tables because we wanted
2340 * to monitor a page which was mapped by too many shadowed page tables. This operation
2341 * sometimes refered to as a 'lightweight flush'.
2342 */
2343 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2344 pgmPoolMonitorModifiedClearAll(pVM);
2345 else
2346 {
2347# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2348 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2349 pgmPoolClearAll(pVM);
2350# else /* !IN_RING3 */
2351 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2352 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2353 return VINF_PGM_SYNC_CR3;
2354# endif /* !IN_RING3 */
2355 }
2356 return VINF_SUCCESS;
2357}
2358
2359#endif /* PGMPOOL_WITH_MONITORING */
2360#ifdef PGMPOOL_WITH_USER_TRACKING
2361
2362/**
2363 * Frees up at least one user entry.
2364 *
2365 * @returns VBox status code.
2366 * @retval VINF_SUCCESS if successfully added.
2367 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2368 * @param pPool The pool.
2369 * @param iUser The user index.
2370 */
2371static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2372{
2373 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2374#ifdef PGMPOOL_WITH_CACHE
2375 /*
2376 * Just free cached pages in a braindead fashion.
2377 */
2378 /** @todo walk the age list backwards and free the first with usage. */
2379 int rc = VINF_SUCCESS;
2380 do
2381 {
2382 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2383 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2384 rc = rc2;
2385 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2386 return rc;
2387#else
2388 /*
2389 * Lazy approach.
2390 */
2391 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2392 Assert(!CPUMIsGuestInLongMode(pVM));
2393 pgmPoolFlushAllInt(pPool);
2394 return VERR_PGM_POOL_FLUSHED;
2395#endif
2396}
2397
2398
2399/**
2400 * Inserts a page into the cache.
2401 *
2402 * This will create user node for the page, insert it into the GCPhys
2403 * hash, and insert it into the age list.
2404 *
2405 * @returns VBox status code.
2406 * @retval VINF_SUCCESS if successfully added.
2407 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2408 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2409 * @param pPool The pool.
2410 * @param pPage The cached page.
2411 * @param GCPhys The GC physical address of the page we're gonna shadow.
2412 * @param iUser The user index.
2413 * @param iUserTable The user table index.
2414 */
2415DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2416{
2417 int rc = VINF_SUCCESS;
2418 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2419
2420 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2421
2422#ifdef VBOX_STRICT
2423 /*
2424 * Check that the entry doesn't already exists.
2425 */
2426 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2427 {
2428 uint16_t i = pPage->iUserHead;
2429 do
2430 {
2431 Assert(i < pPool->cMaxUsers);
2432 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2433 i = paUsers[i].iNext;
2434 } while (i != NIL_PGMPOOL_USER_INDEX);
2435 }
2436#endif
2437
2438 /*
2439 * Find free a user node.
2440 */
2441 uint16_t i = pPool->iUserFreeHead;
2442 if (i == NIL_PGMPOOL_USER_INDEX)
2443 {
2444 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2445 if (RT_FAILURE(rc))
2446 return rc;
2447 i = pPool->iUserFreeHead;
2448 }
2449
2450 /*
2451 * Unlink the user node from the free list,
2452 * initialize and insert it into the user list.
2453 */
2454 pPool->iUserFreeHead = paUsers[i].iNext;
2455 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2456 paUsers[i].iUser = iUser;
2457 paUsers[i].iUserTable = iUserTable;
2458 pPage->iUserHead = i;
2459
2460 /*
2461 * Insert into cache and enable monitoring of the guest page if enabled.
2462 *
2463 * Until we implement caching of all levels, including the CR3 one, we'll
2464 * have to make sure we don't try monitor & cache any recursive reuse of
2465 * a monitored CR3 page. Because all windows versions are doing this we'll
2466 * have to be able to do combined access monitoring, CR3 + PT and
2467 * PD + PT (guest PAE).
2468 *
2469 * Update:
2470 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2471 */
2472#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2473# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2474 const bool fCanBeMonitored = true;
2475# else
2476 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2477 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2478 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2479# endif
2480# ifdef PGMPOOL_WITH_CACHE
2481 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2482# endif
2483 if (fCanBeMonitored)
2484 {
2485# ifdef PGMPOOL_WITH_MONITORING
2486 rc = pgmPoolMonitorInsert(pPool, pPage);
2487 if (rc == VERR_PGM_POOL_CLEARED)
2488 {
2489 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2490# ifndef PGMPOOL_WITH_CACHE
2491 pgmPoolMonitorFlush(pPool, pPage);
2492 rc = VERR_PGM_POOL_FLUSHED;
2493# endif
2494 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2495 paUsers[i].iNext = pPool->iUserFreeHead;
2496 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2497 pPool->iUserFreeHead = i;
2498 }
2499 }
2500# endif
2501#endif /* PGMPOOL_WITH_MONITORING */
2502 return rc;
2503}
2504
2505
2506# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2507/**
2508 * Adds a user reference to a page.
2509 *
2510 * This will move the page to the head of the
2511 *
2512 * @returns VBox status code.
2513 * @retval VINF_SUCCESS if successfully added.
2514 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2515 * @param pPool The pool.
2516 * @param pPage The cached page.
2517 * @param iUser The user index.
2518 * @param iUserTable The user table.
2519 */
2520static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2521{
2522 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2523
2524 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2525# ifdef VBOX_STRICT
2526 /*
2527 * Check that the entry doesn't already exists.
2528 */
2529 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2530 {
2531 uint16_t i = pPage->iUserHead;
2532 do
2533 {
2534 Assert(i < pPool->cMaxUsers);
2535 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2536 i = paUsers[i].iNext;
2537 } while (i != NIL_PGMPOOL_USER_INDEX);
2538 }
2539# endif
2540
2541 /*
2542 * Allocate a user node.
2543 */
2544 uint16_t i = pPool->iUserFreeHead;
2545 if (i == NIL_PGMPOOL_USER_INDEX)
2546 {
2547 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2548 if (RT_FAILURE(rc))
2549 return rc;
2550 i = pPool->iUserFreeHead;
2551 }
2552 pPool->iUserFreeHead = paUsers[i].iNext;
2553
2554 /*
2555 * Initialize the user node and insert it.
2556 */
2557 paUsers[i].iNext = pPage->iUserHead;
2558 paUsers[i].iUser = iUser;
2559 paUsers[i].iUserTable = iUserTable;
2560 pPage->iUserHead = i;
2561
2562# ifdef PGMPOOL_WITH_CACHE
2563 /*
2564 * Tell the cache to update its replacement stats for this page.
2565 */
2566 pgmPoolCacheUsed(pPool, pPage);
2567# endif
2568 return VINF_SUCCESS;
2569}
2570# endif /* PGMPOOL_WITH_CACHE */
2571
2572
2573/**
2574 * Frees a user record associated with a page.
2575 *
2576 * This does not clear the entry in the user table, it simply replaces the
2577 * user record to the chain of free records.
2578 *
2579 * @param pPool The pool.
2580 * @param HCPhys The HC physical address of the shadow page.
2581 * @param iUser The shadow page pool index of the user table.
2582 * @param iUserTable The index into the user table (shadowed).
2583 */
2584static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2585{
2586 /*
2587 * Unlink and free the specified user entry.
2588 */
2589 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2590
2591 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2592 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2593 uint16_t i = pPage->iUserHead;
2594 if ( i != NIL_PGMPOOL_USER_INDEX
2595 && paUsers[i].iUser == iUser
2596 && paUsers[i].iUserTable == iUserTable)
2597 {
2598 pPage->iUserHead = paUsers[i].iNext;
2599
2600 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2601 paUsers[i].iNext = pPool->iUserFreeHead;
2602 pPool->iUserFreeHead = i;
2603 return;
2604 }
2605
2606 /* General: Linear search. */
2607 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2608 while (i != NIL_PGMPOOL_USER_INDEX)
2609 {
2610 if ( paUsers[i].iUser == iUser
2611 && paUsers[i].iUserTable == iUserTable)
2612 {
2613 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2614 paUsers[iPrev].iNext = paUsers[i].iNext;
2615 else
2616 pPage->iUserHead = paUsers[i].iNext;
2617
2618 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2619 paUsers[i].iNext = pPool->iUserFreeHead;
2620 pPool->iUserFreeHead = i;
2621 return;
2622 }
2623 iPrev = i;
2624 i = paUsers[i].iNext;
2625 }
2626
2627 /* Fatal: didn't find it */
2628 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2629 iUser, iUserTable, pPage->GCPhys));
2630}
2631
2632
2633/**
2634 * Gets the entry size of a shadow table.
2635 *
2636 * @param enmKind The kind of page.
2637 *
2638 * @returns The size of the entry in bytes. That is, 4 or 8.
2639 * @returns If the kind is not for a table, an assertion is raised and 0 is
2640 * returned.
2641 */
2642DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2643{
2644 switch (enmKind)
2645 {
2646 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2647 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2648 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2649#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2650 case PGMPOOLKIND_32BIT_PD:
2651 case PGMPOOLKIND_32BIT_PD_PHYS:
2652#else
2653 case PGMPOOLKIND_ROOT_32BIT_PD:
2654#endif
2655 return 4;
2656
2657 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2658 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2659 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2660 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2661 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2662 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2663 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2664 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2665 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2666 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2667 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2668 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2669 case PGMPOOLKIND_64BIT_PML4:
2670#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2671 case PGMPOOLKIND_ROOT_PAE_PD:
2672 case PGMPOOLKIND_ROOT_PDPT:
2673#endif
2674 case PGMPOOLKIND_PAE_PDPT:
2675 case PGMPOOLKIND_ROOT_NESTED:
2676 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2677 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2678 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2679 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2680 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2681 case PGMPOOLKIND_PAE_PD_PHYS:
2682 case PGMPOOLKIND_PAE_PDPT_PHYS:
2683 return 8;
2684
2685 default:
2686 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2687 }
2688}
2689
2690
2691/**
2692 * Gets the entry size of a guest table.
2693 *
2694 * @param enmKind The kind of page.
2695 *
2696 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2697 * @returns If the kind is not for a table, an assertion is raised and 0 is
2698 * returned.
2699 */
2700DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2701{
2702 switch (enmKind)
2703 {
2704 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2705 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2706#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2707 case PGMPOOLKIND_32BIT_PD:
2708#else
2709 case PGMPOOLKIND_ROOT_32BIT_PD:
2710#endif
2711 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2712 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2713 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2714 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2715 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2716 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2717 return 4;
2718
2719 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2720 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2721 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2722 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2723 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2724 case PGMPOOLKIND_64BIT_PML4:
2725#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2726 case PGMPOOLKIND_PAE_PDPT:
2727#else
2728 case PGMPOOLKIND_ROOT_PAE_PD:
2729 case PGMPOOLKIND_ROOT_PDPT:
2730#endif
2731 return 8;
2732
2733 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2734 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2735 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2736 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2737 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2738 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2739 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2740 case PGMPOOLKIND_ROOT_NESTED:
2741 case PGMPOOLKIND_PAE_PD_PHYS:
2742 case PGMPOOLKIND_PAE_PDPT_PHYS:
2743 case PGMPOOLKIND_32BIT_PD_PHYS:
2744 /** @todo can we return 0? (nobody is calling this...) */
2745 AssertFailed();
2746 return 0;
2747
2748 default:
2749 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2750 }
2751}
2752
2753#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2754
2755/**
2756 * Scans one shadow page table for mappings of a physical page.
2757 *
2758 * @param pVM The VM handle.
2759 * @param pPhysPage The guest page in question.
2760 * @param iShw The shadow page table.
2761 * @param cRefs The number of references made in that PT.
2762 */
2763static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2764{
2765 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2766 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2767
2768 /*
2769 * Assert sanity.
2770 */
2771 Assert(cRefs == 1);
2772 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2773 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2774
2775 /*
2776 * Then, clear the actual mappings to the page in the shadow PT.
2777 */
2778 switch (pPage->enmKind)
2779 {
2780 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2781 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2782 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2783 {
2784 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2785 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2786 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2787 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2788 {
2789 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2790 pPT->a[i].u = 0;
2791 cRefs--;
2792 if (!cRefs)
2793 return;
2794 }
2795#ifdef LOG_ENABLED
2796 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2797 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2798 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2799 {
2800 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2801 pPT->a[i].u = 0;
2802 }
2803#endif
2804 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2805 break;
2806 }
2807
2808 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2809 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2810 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2811 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2812 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2813 {
2814 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2815 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2816 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2817 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2818 {
2819 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2820 pPT->a[i].u = 0;
2821 cRefs--;
2822 if (!cRefs)
2823 return;
2824 }
2825#ifdef LOG_ENABLED
2826 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2827 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2828 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2829 {
2830 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2831 pPT->a[i].u = 0;
2832 }
2833#endif
2834 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
2835 break;
2836 }
2837
2838 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2839 {
2840 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2841 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2842 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2843 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2844 {
2845 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2846 pPT->a[i].u = 0;
2847 cRefs--;
2848 if (!cRefs)
2849 return;
2850 }
2851#ifdef LOG_ENABLED
2852 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2853 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2854 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2855 {
2856 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2857 pPT->a[i].u = 0;
2858 }
2859#endif
2860 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2861 break;
2862 }
2863
2864 default:
2865 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2866 }
2867}
2868
2869
2870/**
2871 * Scans one shadow page table for mappings of a physical page.
2872 *
2873 * @param pVM The VM handle.
2874 * @param pPhysPage The guest page in question.
2875 * @param iShw The shadow page table.
2876 * @param cRefs The number of references made in that PT.
2877 */
2878void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2879{
2880 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2881 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2882 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2883 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2884 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2885 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2886}
2887
2888
2889/**
2890 * Flushes a list of shadow page tables mapping the same physical page.
2891 *
2892 * @param pVM The VM handle.
2893 * @param pPhysPage The guest page in question.
2894 * @param iPhysExt The physical cross reference extent list to flush.
2895 */
2896void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2897{
2898 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2899 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2900 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2901
2902 const uint16_t iPhysExtStart = iPhysExt;
2903 PPGMPOOLPHYSEXT pPhysExt;
2904 do
2905 {
2906 Assert(iPhysExt < pPool->cMaxPhysExts);
2907 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2908 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2909 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2910 {
2911 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2912 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2913 }
2914
2915 /* next */
2916 iPhysExt = pPhysExt->iNext;
2917 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2918
2919 /* insert the list into the free list and clear the ram range entry. */
2920 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2921 pPool->iPhysExtFreeHead = iPhysExtStart;
2922 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2923
2924 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2925}
2926
2927#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2928
2929/**
2930 * Scans all shadow page tables for mappings of a physical page.
2931 *
2932 * This may be slow, but it's most likely more efficient than cleaning
2933 * out the entire page pool / cache.
2934 *
2935 * @returns VBox status code.
2936 * @retval VINF_SUCCESS if all references has been successfully cleared.
2937 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2938 * a page pool cleaning.
2939 *
2940 * @param pVM The VM handle.
2941 * @param pPhysPage The guest page in question.
2942 */
2943int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2944{
2945 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2946 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2947 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2948 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2949
2950#if 1
2951 /*
2952 * There is a limit to what makes sense.
2953 */
2954 if (pPool->cPresent > 1024)
2955 {
2956 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2957 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2958 return VINF_PGM_GCPHYS_ALIASED;
2959 }
2960#endif
2961
2962 /*
2963 * Iterate all the pages until we've encountered all that in use.
2964 * This is simple but not quite optimal solution.
2965 */
2966 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2967 const uint32_t u32 = u64;
2968 unsigned cLeft = pPool->cUsedPages;
2969 unsigned iPage = pPool->cCurPages;
2970 while (--iPage >= PGMPOOL_IDX_FIRST)
2971 {
2972 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2973 if (pPage->GCPhys != NIL_RTGCPHYS)
2974 {
2975 switch (pPage->enmKind)
2976 {
2977 /*
2978 * We only care about shadow page tables.
2979 */
2980 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2981 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2982 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2983 {
2984 unsigned cPresent = pPage->cPresent;
2985 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2986 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2987 if (pPT->a[i].n.u1Present)
2988 {
2989 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2990 {
2991 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2992 pPT->a[i].u = 0;
2993 }
2994 if (!--cPresent)
2995 break;
2996 }
2997 break;
2998 }
2999
3000 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3001 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3002 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3003 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3004 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3005 {
3006 unsigned cPresent = pPage->cPresent;
3007 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3008 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3009 if (pPT->a[i].n.u1Present)
3010 {
3011 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3012 {
3013 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3014 pPT->a[i].u = 0;
3015 }
3016 if (!--cPresent)
3017 break;
3018 }
3019 break;
3020 }
3021 }
3022 if (!--cLeft)
3023 break;
3024 }
3025 }
3026
3027 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3028 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3029 return VINF_SUCCESS;
3030}
3031
3032
3033/**
3034 * Clears the user entry in a user table.
3035 *
3036 * This is used to remove all references to a page when flushing it.
3037 */
3038static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3039{
3040 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3041 Assert(pUser->iUser < pPool->cCurPages);
3042 uint32_t iUserTable = pUser->iUserTable;
3043
3044 /*
3045 * Map the user page.
3046 */
3047 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3048#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3049 if (pUserPage->enmKind == PGMPOOLKIND_ROOT_PAE_PD)
3050 {
3051 /* Must translate the fake 2048 entry PD to a 512 PD one since the R0 mapping is not linear. */
3052 Assert(pUser->iUser == PGMPOOL_IDX_PAE_PD);
3053 uint32_t iPdpt = iUserTable / X86_PG_PAE_ENTRIES;
3054 iUserTable %= X86_PG_PAE_ENTRIES;
3055 pUserPage = &pPool->aPages[PGMPOOL_IDX_PAE_PD_0 + iPdpt];
3056 Assert(pUserPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD);
3057 }
3058#endif
3059 union
3060 {
3061 uint64_t *pau64;
3062 uint32_t *pau32;
3063 } u;
3064 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3065
3066 /* Safety precaution in case we change the paging for other modes too in the future. */
3067 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
3068
3069#ifdef VBOX_STRICT
3070 /*
3071 * Some sanity checks.
3072 */
3073 switch (pUserPage->enmKind)
3074 {
3075# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3076 case PGMPOOLKIND_32BIT_PD:
3077 case PGMPOOLKIND_32BIT_PD_PHYS:
3078 Assert(iUserTable < X86_PG_ENTRIES);
3079 break;
3080# else
3081 case PGMPOOLKIND_ROOT_32BIT_PD:
3082 Assert(iUserTable < X86_PG_ENTRIES);
3083 Assert(!(u.pau32[iUserTable] & PGM_PDFLAGS_MAPPING));
3084 break;
3085# endif
3086# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3087 case PGMPOOLKIND_ROOT_PAE_PD:
3088 Assert(iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
3089 AssertMsg(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING), ("%llx %d\n", u.pau64[iUserTable], iUserTable));
3090 break;
3091# endif
3092# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3093 case PGMPOOLKIND_PAE_PDPT:
3094 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3095 case PGMPOOLKIND_PAE_PDPT_PHYS:
3096# else
3097 case PGMPOOLKIND_ROOT_PDPT:
3098# endif
3099 Assert(iUserTable < 4);
3100 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3101 break;
3102 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3103 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3104 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3105 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3106 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3107 case PGMPOOLKIND_PAE_PD_PHYS:
3108 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3109 break;
3110 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3111 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3112 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3113 break;
3114 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3115 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3116 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3117 break;
3118 case PGMPOOLKIND_64BIT_PML4:
3119 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3120 /* GCPhys >> PAGE_SHIFT is the index here */
3121 break;
3122 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3123 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3124 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3125 break;
3126
3127 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3128 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3129 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3130 break;
3131
3132 case PGMPOOLKIND_ROOT_NESTED:
3133 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3134 break;
3135
3136 default:
3137 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3138 break;
3139 }
3140#endif /* VBOX_STRICT */
3141
3142 /*
3143 * Clear the entry in the user page.
3144 */
3145 switch (pUserPage->enmKind)
3146 {
3147 /* 32-bit entries */
3148#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3149 case PGMPOOLKIND_32BIT_PD:
3150 case PGMPOOLKIND_32BIT_PD_PHYS:
3151#else
3152 case PGMPOOLKIND_ROOT_32BIT_PD:
3153#endif
3154 u.pau32[iUserTable] = 0;
3155 break;
3156
3157 /* 64-bit entries */
3158 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3159 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3160 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3161 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3162 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3163 case PGMPOOLKIND_PAE_PD_PHYS:
3164 case PGMPOOLKIND_PAE_PDPT_PHYS:
3165 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3166 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3167 case PGMPOOLKIND_64BIT_PML4:
3168 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3169 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3170# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3171 case PGMPOOLKIND_ROOT_PAE_PD:
3172#endif
3173#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3174 case PGMPOOLKIND_PAE_PDPT:
3175 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3176#else
3177 case PGMPOOLKIND_ROOT_PDPT:
3178#endif
3179 case PGMPOOLKIND_ROOT_NESTED:
3180 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3181 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3182 u.pau64[iUserTable] = 0;
3183 break;
3184
3185 default:
3186 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3187 }
3188}
3189
3190
3191/**
3192 * Clears all users of a page.
3193 */
3194static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3195{
3196 /*
3197 * Free all the user records.
3198 */
3199 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3200
3201 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3202 uint16_t i = pPage->iUserHead;
3203 while (i != NIL_PGMPOOL_USER_INDEX)
3204 {
3205 /* Clear enter in user table. */
3206 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3207
3208 /* Free it. */
3209 const uint16_t iNext = paUsers[i].iNext;
3210 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3211 paUsers[i].iNext = pPool->iUserFreeHead;
3212 pPool->iUserFreeHead = i;
3213
3214 /* Next. */
3215 i = iNext;
3216 }
3217 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3218}
3219
3220#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3221
3222/**
3223 * Allocates a new physical cross reference extent.
3224 *
3225 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3226 * @param pVM The VM handle.
3227 * @param piPhysExt Where to store the phys ext index.
3228 */
3229PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3230{
3231 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3232 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3233 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3234 {
3235 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3236 return NULL;
3237 }
3238 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3239 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3240 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3241 *piPhysExt = iPhysExt;
3242 return pPhysExt;
3243}
3244
3245
3246/**
3247 * Frees a physical cross reference extent.
3248 *
3249 * @param pVM The VM handle.
3250 * @param iPhysExt The extent to free.
3251 */
3252void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3253{
3254 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3255 Assert(iPhysExt < pPool->cMaxPhysExts);
3256 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3257 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3258 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3259 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3260 pPool->iPhysExtFreeHead = iPhysExt;
3261}
3262
3263
3264/**
3265 * Frees a physical cross reference extent.
3266 *
3267 * @param pVM The VM handle.
3268 * @param iPhysExt The extent to free.
3269 */
3270void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3271{
3272 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3273
3274 const uint16_t iPhysExtStart = iPhysExt;
3275 PPGMPOOLPHYSEXT pPhysExt;
3276 do
3277 {
3278 Assert(iPhysExt < pPool->cMaxPhysExts);
3279 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3280 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3281 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3282
3283 /* next */
3284 iPhysExt = pPhysExt->iNext;
3285 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3286
3287 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3288 pPool->iPhysExtFreeHead = iPhysExtStart;
3289}
3290
3291
3292/**
3293 * Insert a reference into a list of physical cross reference extents.
3294 *
3295 * @returns The new ram range flags (top 16-bits).
3296 *
3297 * @param pVM The VM handle.
3298 * @param iPhysExt The physical extent index of the list head.
3299 * @param iShwPT The shadow page table index.
3300 *
3301 */
3302static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3303{
3304 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3305 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3306
3307 /* special common case. */
3308 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3309 {
3310 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3311 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3312 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
3313 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3314 }
3315
3316 /* general treatment. */
3317 const uint16_t iPhysExtStart = iPhysExt;
3318 unsigned cMax = 15;
3319 for (;;)
3320 {
3321 Assert(iPhysExt < pPool->cMaxPhysExts);
3322 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3323 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3324 {
3325 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3326 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3327 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3328 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3329 }
3330 if (!--cMax)
3331 {
3332 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3333 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3334 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
3335 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3336 }
3337 }
3338
3339 /* add another extent to the list. */
3340 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3341 if (!pNew)
3342 {
3343 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3344 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3345 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3346 }
3347 pNew->iNext = iPhysExtStart;
3348 pNew->aidx[0] = iShwPT;
3349 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3350 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3351}
3352
3353
3354/**
3355 * Add a reference to guest physical page where extents are in use.
3356 *
3357 * @returns The new ram range flags (top 16-bits).
3358 *
3359 * @param pVM The VM handle.
3360 * @param u16 The ram range flags (top 16-bits).
3361 * @param iShwPT The shadow page table index.
3362 */
3363uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3364{
3365 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
3366 {
3367 /*
3368 * Convert to extent list.
3369 */
3370 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
3371 uint16_t iPhysExt;
3372 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3373 if (pPhysExt)
3374 {
3375 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
3376 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3377 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
3378 pPhysExt->aidx[1] = iShwPT;
3379 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3380 }
3381 else
3382 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3383 }
3384 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
3385 {
3386 /*
3387 * Insert into the extent list.
3388 */
3389 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
3390 }
3391 else
3392 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3393 return u16;
3394}
3395
3396
3397/**
3398 * Clear references to guest physical memory.
3399 *
3400 * @param pPool The pool.
3401 * @param pPage The page.
3402 * @param pPhysPage Pointer to the aPages entry in the ram range.
3403 */
3404void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3405{
3406 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
3407 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3408
3409 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
3410 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
3411 {
3412 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3413 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3414 do
3415 {
3416 Assert(iPhysExt < pPool->cMaxPhysExts);
3417
3418 /*
3419 * Look for the shadow page and check if it's all freed.
3420 */
3421 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3422 {
3423 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3424 {
3425 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3426
3427 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3428 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3429 {
3430 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3431 return;
3432 }
3433
3434 /* we can free the node. */
3435 PVM pVM = pPool->CTX_SUFF(pVM);
3436 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3437 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3438 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3439 {
3440 /* lonely node */
3441 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3442 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
3443 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3444 }
3445 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3446 {
3447 /* head */
3448 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
3449 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
3450 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
3451 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
3452 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3453 }
3454 else
3455 {
3456 /* in list */
3457 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3458 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3459 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3460 }
3461 iPhysExt = iPhysExtNext;
3462 return;
3463 }
3464 }
3465
3466 /* next */
3467 iPhysExtPrev = iPhysExt;
3468 iPhysExt = paPhysExts[iPhysExt].iNext;
3469 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3470
3471 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3472 }
3473 else /* nothing to do */
3474 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
3475}
3476
3477
3478/**
3479 * Clear references to guest physical memory.
3480 *
3481 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3482 * is assumed to be correct, so the linear search can be skipped and we can assert
3483 * at an earlier point.
3484 *
3485 * @param pPool The pool.
3486 * @param pPage The page.
3487 * @param HCPhys The host physical address corresponding to the guest page.
3488 * @param GCPhys The guest physical address corresponding to HCPhys.
3489 */
3490static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3491{
3492 /*
3493 * Walk range list.
3494 */
3495 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3496 while (pRam)
3497 {
3498 RTGCPHYS off = GCPhys - pRam->GCPhys;
3499 if (off < pRam->cb)
3500 {
3501 /* does it match? */
3502 const unsigned iPage = off >> PAGE_SHIFT;
3503 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3504#ifdef LOG_ENABLED
3505RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3506Log(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3507#endif
3508 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3509 {
3510 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3511 return;
3512 }
3513 break;
3514 }
3515 pRam = pRam->CTX_SUFF(pNext);
3516 }
3517 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3518}
3519
3520
3521/**
3522 * Clear references to guest physical memory.
3523 *
3524 * @param pPool The pool.
3525 * @param pPage The page.
3526 * @param HCPhys The host physical address corresponding to the guest page.
3527 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3528 */
3529static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3530{
3531 /*
3532 * Walk range list.
3533 */
3534 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3535 while (pRam)
3536 {
3537 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3538 if (off < pRam->cb)
3539 {
3540 /* does it match? */
3541 const unsigned iPage = off >> PAGE_SHIFT;
3542 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3543 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3544 {
3545 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3546 return;
3547 }
3548 break;
3549 }
3550 pRam = pRam->CTX_SUFF(pNext);
3551 }
3552
3553 /*
3554 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3555 */
3556 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3557 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3558 while (pRam)
3559 {
3560 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3561 while (iPage-- > 0)
3562 {
3563 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3564 {
3565 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3566 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3567 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3568 return;
3569 }
3570 }
3571 pRam = pRam->CTX_SUFF(pNext);
3572 }
3573
3574 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3575}
3576
3577
3578/**
3579 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3580 *
3581 * @param pPool The pool.
3582 * @param pPage The page.
3583 * @param pShwPT The shadow page table (mapping of the page).
3584 * @param pGstPT The guest page table.
3585 */
3586DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3587{
3588 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3589 if (pShwPT->a[i].n.u1Present)
3590 {
3591 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3592 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3593 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3594 if (!--pPage->cPresent)
3595 break;
3596 }
3597}
3598
3599
3600/**
3601 * Clear references to guest physical memory in a PAE / 32-bit page table.
3602 *
3603 * @param pPool The pool.
3604 * @param pPage The page.
3605 * @param pShwPT The shadow page table (mapping of the page).
3606 * @param pGstPT The guest page table (just a half one).
3607 */
3608DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3609{
3610 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3611 if (pShwPT->a[i].n.u1Present)
3612 {
3613 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3614 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3615 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3616 }
3617}
3618
3619
3620/**
3621 * Clear references to guest physical memory in a PAE / PAE page table.
3622 *
3623 * @param pPool The pool.
3624 * @param pPage The page.
3625 * @param pShwPT The shadow page table (mapping of the page).
3626 * @param pGstPT The guest page table.
3627 */
3628DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3629{
3630 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3631 if (pShwPT->a[i].n.u1Present)
3632 {
3633 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3634 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3635 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3636 }
3637}
3638
3639
3640/**
3641 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3642 *
3643 * @param pPool The pool.
3644 * @param pPage The page.
3645 * @param pShwPT The shadow page table (mapping of the page).
3646 */
3647DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3648{
3649 RTGCPHYS GCPhys = pPage->GCPhys;
3650 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3651 if (pShwPT->a[i].n.u1Present)
3652 {
3653 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3654 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3655 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3656 }
3657}
3658
3659
3660/**
3661 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3662 *
3663 * @param pPool The pool.
3664 * @param pPage The page.
3665 * @param pShwPT The shadow page table (mapping of the page).
3666 */
3667DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3668{
3669 RTGCPHYS GCPhys = pPage->GCPhys;
3670 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3671 if (pShwPT->a[i].n.u1Present)
3672 {
3673 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3674 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3675 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3676 }
3677}
3678
3679#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3680
3681
3682#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3683/**
3684 * Clear references to shadowed pages in a 32 bits page directory.
3685 *
3686 * @param pPool The pool.
3687 * @param pPage The page.
3688 * @param pShwPD The shadow page directory (mapping of the page).
3689 */
3690DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
3691{
3692 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3693 {
3694 if ( pShwPD->a[i].n.u1Present
3695 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3696 )
3697 {
3698 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
3699 if (pSubPage)
3700 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3701 else
3702 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
3703 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3704 }
3705 }
3706}
3707#endif
3708
3709/**
3710 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3711 *
3712 * @param pPool The pool.
3713 * @param pPage The page.
3714 * @param pShwPD The shadow page directory (mapping of the page).
3715 */
3716DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3717{
3718 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3719 {
3720 if ( pShwPD->a[i].n.u1Present
3721#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3722 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3723#endif
3724 )
3725 {
3726 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3727 if (pSubPage)
3728 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3729 else
3730 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3731 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3732 }
3733 }
3734}
3735
3736
3737/**
3738 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3739 *
3740 * @param pPool The pool.
3741 * @param pPage The page.
3742 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3743 */
3744DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3745{
3746 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3747 {
3748 if ( pShwPDPT->a[i].n.u1Present
3749#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3750 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
3751#endif
3752 )
3753 {
3754 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3755 if (pSubPage)
3756 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3757 else
3758 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3759 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3760 }
3761 }
3762}
3763
3764
3765/**
3766 * Clear references to shadowed pages in a 64-bit level 4 page table.
3767 *
3768 * @param pPool The pool.
3769 * @param pPage The page.
3770 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3771 */
3772DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3773{
3774 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3775 {
3776 if (pShwPML4->a[i].n.u1Present)
3777 {
3778 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3779 if (pSubPage)
3780 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3781 else
3782 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3783 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3784 }
3785 }
3786}
3787
3788
3789/**
3790 * Clear references to shadowed pages in an EPT page table.
3791 *
3792 * @param pPool The pool.
3793 * @param pPage The page.
3794 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3795 */
3796DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3797{
3798 RTGCPHYS GCPhys = pPage->GCPhys;
3799 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3800 if (pShwPT->a[i].n.u1Present)
3801 {
3802 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3803 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3804 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3805 }
3806}
3807
3808
3809/**
3810 * Clear references to shadowed pages in an EPT page directory.
3811 *
3812 * @param pPool The pool.
3813 * @param pPage The page.
3814 * @param pShwPD The shadow page directory (mapping of the page).
3815 */
3816DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3817{
3818 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3819 {
3820 if (pShwPD->a[i].n.u1Present)
3821 {
3822 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3823 if (pSubPage)
3824 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3825 else
3826 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3827 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3828 }
3829 }
3830}
3831
3832
3833/**
3834 * Clear references to shadowed pages in an EPT page directory pointer table.
3835 *
3836 * @param pPool The pool.
3837 * @param pPage The page.
3838 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3839 */
3840DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3841{
3842 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3843 {
3844 if (pShwPDPT->a[i].n.u1Present)
3845 {
3846 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3847 if (pSubPage)
3848 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3849 else
3850 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3851 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3852 }
3853 }
3854}
3855
3856
3857/**
3858 * Clears all references made by this page.
3859 *
3860 * This includes other shadow pages and GC physical addresses.
3861 *
3862 * @param pPool The pool.
3863 * @param pPage The page.
3864 */
3865static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3866{
3867 /*
3868 * Map the shadow page and take action according to the page kind.
3869 */
3870 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3871 switch (pPage->enmKind)
3872 {
3873#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3874 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3875 {
3876 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3877 void *pvGst;
3878 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3879 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3880 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3881 break;
3882 }
3883
3884 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3885 {
3886 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3887 void *pvGst;
3888 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3889 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3890 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3891 break;
3892 }
3893
3894 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3895 {
3896 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3897 void *pvGst;
3898 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3899 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3900 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3901 break;
3902 }
3903
3904 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3905 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3906 {
3907 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3908 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3909 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3910 break;
3911 }
3912
3913 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3914 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3915 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3916 {
3917 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3918 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3919 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3920 break;
3921 }
3922
3923#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3924 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3925 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3926 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3927 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3928 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3929 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3930 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3931 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3932 break;
3933#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3934
3935 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3936 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3937 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3938 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3939 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3940 case PGMPOOLKIND_PAE_PD_PHYS:
3941 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3942 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3943 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3944 break;
3945
3946#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3947 case PGMPOOLKIND_32BIT_PD:
3948 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
3949 break;
3950
3951 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3952 case PGMPOOLKIND_PAE_PDPT:
3953 case PGMPOOLKIND_PAE_PDPT_PHYS:
3954#endif
3955 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3956 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3957 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3958 break;
3959
3960 case PGMPOOLKIND_64BIT_PML4:
3961 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3962 break;
3963
3964 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3965 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
3966 break;
3967
3968 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3969 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
3970 break;
3971
3972 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3973 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
3974 break;
3975
3976 default:
3977 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3978 }
3979
3980 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3981 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3982 ASMMemZeroPage(pvShw);
3983 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3984 pPage->fZeroed = true;
3985}
3986
3987#endif /* PGMPOOL_WITH_USER_TRACKING */
3988
3989/**
3990 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3991 *
3992 * @param pPool The pool.
3993 */
3994static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3995{
3996#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3997 /* Start a subset so we won't run out of mapping space. */
3998 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
3999 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4000#endif
4001
4002 /*
4003 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
4004 */
4005 Assert(NIL_PGMPOOL_IDX == 0);
4006 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
4007 {
4008 /*
4009 * Get the page address.
4010 */
4011 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4012 union
4013 {
4014 uint64_t *pau64;
4015 uint32_t *pau32;
4016 } u;
4017
4018 /*
4019 * Mark stuff not present.
4020 */
4021 switch (pPage->enmKind)
4022 {
4023#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4024 case PGMPOOLKIND_ROOT_32BIT_PD:
4025 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4026 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
4027 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
4028 u.pau32[iPage] = 0;
4029 break;
4030
4031 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4032 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4033 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
4034 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
4035 u.pau64[iPage] = 0;
4036 break;
4037
4038 case PGMPOOLKIND_ROOT_PDPT:
4039 /* Not root of shadowed pages currently, ignore it. */
4040 break;
4041#endif
4042
4043 case PGMPOOLKIND_ROOT_NESTED:
4044 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4045 ASMMemZero32(u.pau64, PAGE_SIZE);
4046 break;
4047 }
4048 }
4049
4050 /*
4051 * Paranoia (to be removed), flag a global CR3 sync.
4052 */
4053 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
4054
4055#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4056 /* Pop the subset. */
4057 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4058#endif
4059}
4060
4061
4062/**
4063 * Flushes the entire cache.
4064 *
4065 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4066 * and execute this CR3 flush.
4067 *
4068 * @param pPool The pool.
4069 */
4070static void pgmPoolFlushAllInt(PPGMPOOL pPool)
4071{
4072 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
4073 LogFlow(("pgmPoolFlushAllInt:\n"));
4074
4075 /*
4076 * If there are no pages in the pool, there is nothing to do.
4077 */
4078 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4079 {
4080 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4081 return;
4082 }
4083
4084 /*
4085 * Nuke the free list and reinsert all pages into it.
4086 */
4087 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4088 {
4089 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4090
4091#ifdef IN_RING3
4092 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMR3, pPage->pvPageR3));
4093#endif
4094#ifdef PGMPOOL_WITH_MONITORING
4095 if (pPage->fMonitored)
4096 pgmPoolMonitorFlush(pPool, pPage);
4097 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4098 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4099 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4100 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4101 pPage->cModifications = 0;
4102#endif
4103 pPage->GCPhys = NIL_RTGCPHYS;
4104 pPage->enmKind = PGMPOOLKIND_FREE;
4105 Assert(pPage->idx == i);
4106 pPage->iNext = i + 1;
4107 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4108 pPage->fSeenNonGlobal = false;
4109 pPage->fMonitored= false;
4110 pPage->fCached = false;
4111 pPage->fReusedFlushPending = false;
4112 pPage->fCR3Mix = false;
4113#ifdef PGMPOOL_WITH_USER_TRACKING
4114 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4115#endif
4116#ifdef PGMPOOL_WITH_CACHE
4117 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4118 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4119#endif
4120 }
4121 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4122 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4123 pPool->cUsedPages = 0;
4124
4125#ifdef PGMPOOL_WITH_USER_TRACKING
4126 /*
4127 * Zap and reinitialize the user records.
4128 */
4129 pPool->cPresent = 0;
4130 pPool->iUserFreeHead = 0;
4131 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4132 const unsigned cMaxUsers = pPool->cMaxUsers;
4133 for (unsigned i = 0; i < cMaxUsers; i++)
4134 {
4135 paUsers[i].iNext = i + 1;
4136 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4137 paUsers[i].iUserTable = 0xfffffffe;
4138 }
4139 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4140#endif
4141
4142#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4143 /*
4144 * Clear all the GCPhys links and rebuild the phys ext free list.
4145 */
4146 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4147 pRam;
4148 pRam = pRam->CTX_SUFF(pNext))
4149 {
4150 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4151 while (iPage-- > 0)
4152 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
4153 }
4154
4155 pPool->iPhysExtFreeHead = 0;
4156 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4157 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4158 for (unsigned i = 0; i < cMaxPhysExts; i++)
4159 {
4160 paPhysExts[i].iNext = i + 1;
4161 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4162 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4163 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4164 }
4165 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4166#endif
4167
4168#ifdef PGMPOOL_WITH_MONITORING
4169 /*
4170 * Just zap the modified list.
4171 */
4172 pPool->cModifiedPages = 0;
4173 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4174#endif
4175
4176#ifdef PGMPOOL_WITH_CACHE
4177 /*
4178 * Clear the GCPhys hash and the age list.
4179 */
4180 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4181 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4182 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4183 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4184#endif
4185
4186 /*
4187 * Flush all the special root pages.
4188 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4189 */
4190 pgmPoolFlushAllSpecialRoots(pPool);
4191 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4192 {
4193 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4194 pPage->iNext = NIL_PGMPOOL_IDX;
4195#ifdef PGMPOOL_WITH_MONITORING
4196 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4197 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4198 pPage->cModifications = 0;
4199 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4200 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4201 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4202 if (pPage->fMonitored)
4203 {
4204 PVM pVM = pPool->CTX_SUFF(pVM);
4205 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4206 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4207 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4208 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4209 pPool->pszAccessHandler);
4210 AssertFatalRCSuccess(rc);
4211# ifdef PGMPOOL_WITH_CACHE
4212 pgmPoolHashInsert(pPool, pPage);
4213# endif
4214 }
4215#endif
4216#ifdef PGMPOOL_WITH_USER_TRACKING
4217 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4218#endif
4219#ifdef PGMPOOL_WITH_CACHE
4220 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4221 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4222#endif
4223 }
4224
4225 /*
4226 * Finally, assert the FF.
4227 */
4228 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
4229
4230 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4231}
4232
4233
4234/**
4235 * Flushes a pool page.
4236 *
4237 * This moves the page to the free list after removing all user references to it.
4238 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
4239 *
4240 * @returns VBox status code.
4241 * @retval VINF_SUCCESS on success.
4242 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
4243 * @param pPool The pool.
4244 * @param HCPhys The HC physical address of the shadow page.
4245 */
4246int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4247{
4248 int rc = VINF_SUCCESS;
4249 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4250 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4251 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4252
4253 /*
4254 * Quietly reject any attempts at flushing any of the special root pages.
4255 */
4256 if (pPage->idx < PGMPOOL_IDX_FIRST)
4257 {
4258 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4259 return VINF_SUCCESS;
4260 }
4261
4262 /*
4263 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4264 */
4265 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
4266 {
4267#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4268 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4269 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4270 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4271 || pPage->enmKind == PGMPOOLKIND_32BIT_PD,
4272 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4273#else
4274 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4,
4275 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4276#endif
4277 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4278 return VINF_SUCCESS;
4279 }
4280
4281#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4282 /* Start a subset so we won't run out of mapping space. */
4283 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4284 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4285#endif
4286
4287 /*
4288 * Mark the page as being in need of a ASMMemZeroPage().
4289 */
4290 pPage->fZeroed = false;
4291
4292#ifdef PGMPOOL_WITH_USER_TRACKING
4293 /*
4294 * Clear the page.
4295 */
4296 pgmPoolTrackClearPageUsers(pPool, pPage);
4297 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4298 pgmPoolTrackDeref(pPool, pPage);
4299 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4300#endif
4301
4302#ifdef PGMPOOL_WITH_CACHE
4303 /*
4304 * Flush it from the cache.
4305 */
4306 pgmPoolCacheFlushPage(pPool, pPage);
4307#endif /* PGMPOOL_WITH_CACHE */
4308
4309#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4310 /* Heavy stuff done. */
4311 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4312#endif
4313
4314#ifdef PGMPOOL_WITH_MONITORING
4315 /*
4316 * Deregistering the monitoring.
4317 */
4318 if (pPage->fMonitored)
4319 rc = pgmPoolMonitorFlush(pPool, pPage);
4320#endif
4321
4322 /*
4323 * Free the page.
4324 */
4325 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4326 pPage->iNext = pPool->iFreeHead;
4327 pPool->iFreeHead = pPage->idx;
4328 pPage->enmKind = PGMPOOLKIND_FREE;
4329 pPage->GCPhys = NIL_RTGCPHYS;
4330 pPage->fReusedFlushPending = false;
4331
4332 pPool->cUsedPages--;
4333 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4334 return rc;
4335}
4336
4337
4338/**
4339 * Frees a usage of a pool page.
4340 *
4341 * The caller is responsible to updating the user table so that it no longer
4342 * references the shadow page.
4343 *
4344 * @param pPool The pool.
4345 * @param HCPhys The HC physical address of the shadow page.
4346 * @param iUser The shadow page pool index of the user table.
4347 * @param iUserTable The index into the user table (shadowed).
4348 */
4349void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4350{
4351 STAM_PROFILE_START(&pPool->StatFree, a);
4352 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4353 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4354 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4355#ifdef PGMPOOL_WITH_USER_TRACKING
4356 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4357#endif
4358#ifdef PGMPOOL_WITH_CACHE
4359 if (!pPage->fCached)
4360#endif
4361 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
4362 STAM_PROFILE_STOP(&pPool->StatFree, a);
4363}
4364
4365
4366/**
4367 * Makes one or more free page free.
4368 *
4369 * @returns VBox status code.
4370 * @retval VINF_SUCCESS on success.
4371 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4372 *
4373 * @param pPool The pool.
4374 * @param iUser The user of the page.
4375 */
4376static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
4377{
4378 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4379
4380 /*
4381 * If the pool isn't full grown yet, expand it.
4382 */
4383 if (pPool->cCurPages < pPool->cMaxPages)
4384 {
4385 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4386#ifdef IN_RING3
4387 int rc = PGMR3PoolGrow(pPool->pVMR3);
4388#else
4389 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
4390#endif
4391 if (RT_FAILURE(rc))
4392 return rc;
4393 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4394 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4395 return VINF_SUCCESS;
4396 }
4397
4398#ifdef PGMPOOL_WITH_CACHE
4399 /*
4400 * Free one cached page.
4401 */
4402 return pgmPoolCacheFreeOne(pPool, iUser);
4403#else
4404 /*
4405 * Flush the pool.
4406 *
4407 * If we have tracking enabled, it should be possible to come up with
4408 * a cheap replacement strategy...
4409 */
4410 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
4411 Assert(!CPUMIsGuestInLongMode(pVM));
4412 pgmPoolFlushAllInt(pPool);
4413 return VERR_PGM_POOL_FLUSHED;
4414#endif
4415}
4416
4417
4418/**
4419 * Allocates a page from the pool.
4420 *
4421 * This page may actually be a cached page and not in need of any processing
4422 * on the callers part.
4423 *
4424 * @returns VBox status code.
4425 * @retval VINF_SUCCESS if a NEW page was allocated.
4426 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4427 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4428 * @param pVM The VM handle.
4429 * @param GCPhys The GC physical address of the page we're gonna shadow.
4430 * For 4MB and 2MB PD entries, it's the first address the
4431 * shadow PT is covering.
4432 * @param enmKind The kind of mapping.
4433 * @param iUser The shadow page pool index of the user table.
4434 * @param iUserTable The index into the user table (shadowed).
4435 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4436 */
4437int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
4438{
4439 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4440 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4441 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4442 *ppPage = NULL;
4443 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4444 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4445 * Assert(!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4446
4447#ifdef PGMPOOL_WITH_CACHE
4448 if (pPool->fCacheEnabled)
4449 {
4450 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
4451 if (RT_SUCCESS(rc2))
4452 {
4453 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4454 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4455 return rc2;
4456 }
4457 }
4458#endif
4459
4460 /*
4461 * Allocate a new one.
4462 */
4463 int rc = VINF_SUCCESS;
4464 uint16_t iNew = pPool->iFreeHead;
4465 if (iNew == NIL_PGMPOOL_IDX)
4466 {
4467 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
4468 if (RT_FAILURE(rc))
4469 {
4470 if (rc != VERR_PGM_POOL_CLEARED)
4471 {
4472 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4473 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4474 return rc;
4475 }
4476 Log(("pgmPoolMakeMoreFreePages failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
4477 rc = VERR_PGM_POOL_FLUSHED;
4478 }
4479 iNew = pPool->iFreeHead;
4480 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4481 }
4482
4483 /* unlink the free head */
4484 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4485 pPool->iFreeHead = pPage->iNext;
4486 pPage->iNext = NIL_PGMPOOL_IDX;
4487
4488 /*
4489 * Initialize it.
4490 */
4491 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4492 pPage->enmKind = enmKind;
4493 pPage->GCPhys = GCPhys;
4494 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4495 pPage->fMonitored = false;
4496 pPage->fCached = false;
4497 pPage->fReusedFlushPending = false;
4498 pPage->fCR3Mix = false;
4499#ifdef PGMPOOL_WITH_MONITORING
4500 pPage->cModifications = 0;
4501 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4502 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4503#endif
4504#ifdef PGMPOOL_WITH_USER_TRACKING
4505 pPage->cPresent = 0;
4506 pPage->iFirstPresent = ~0;
4507
4508 /*
4509 * Insert into the tracking and cache. If this fails, free the page.
4510 */
4511 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4512 if (RT_FAILURE(rc3))
4513 {
4514 if (rc3 != VERR_PGM_POOL_CLEARED)
4515 {
4516 pPool->cUsedPages--;
4517 pPage->enmKind = PGMPOOLKIND_FREE;
4518 pPage->GCPhys = NIL_RTGCPHYS;
4519 pPage->iNext = pPool->iFreeHead;
4520 pPool->iFreeHead = pPage->idx;
4521 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4522 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4523 return rc3;
4524 }
4525 Log(("pgmPoolTrackInsert failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4526 rc = VERR_PGM_POOL_FLUSHED;
4527 }
4528#endif /* PGMPOOL_WITH_USER_TRACKING */
4529
4530 /*
4531 * Commit the allocation, clear the page and return.
4532 */
4533#ifdef VBOX_WITH_STATISTICS
4534 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4535 pPool->cUsedPagesHigh = pPool->cUsedPages;
4536#endif
4537
4538 if (!pPage->fZeroed)
4539 {
4540 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4541 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4542 ASMMemZeroPage(pv);
4543 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4544 }
4545
4546 *ppPage = pPage;
4547 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4548 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4549 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4550 return rc;
4551}
4552
4553
4554/**
4555 * Frees a usage of a pool page.
4556 *
4557 * @param pVM The VM handle.
4558 * @param HCPhys The HC physical address of the shadow page.
4559 * @param iUser The shadow page pool index of the user table.
4560 * @param iUserTable The index into the user table (shadowed).
4561 */
4562void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4563{
4564 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4565 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4566 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4567}
4568
4569
4570/**
4571 * Gets a in-use page in the pool by it's physical address.
4572 *
4573 * @returns Pointer to the page.
4574 * @param pVM The VM handle.
4575 * @param HCPhys The HC physical address of the shadow page.
4576 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4577 */
4578PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4579{
4580 /** @todo profile this! */
4581 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4582 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4583 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%RHp -> %p:{.idx=%d .GCPhys=%RGp .enmKind=%s}\n",
4584 HCPhys, pPage, pPage->idx, pPage->GCPhys, pgmPoolPoolKindToStr(pPage->enmKind)));
4585 return pPage;
4586}
4587
4588
4589/**
4590 * Flushes the entire cache.
4591 *
4592 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4593 * and execute this CR3 flush.
4594 *
4595 * @param pPool The pool.
4596 */
4597void pgmPoolFlushAll(PVM pVM)
4598{
4599 LogFlow(("pgmPoolFlushAll:\n"));
4600 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4601}
4602
4603#ifdef LOG_ENABLED
4604static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
4605{
4606 switch(enmKind)
4607 {
4608 case PGMPOOLKIND_INVALID:
4609 return "PGMPOOLKIND_INVALID";
4610 case PGMPOOLKIND_FREE:
4611 return "PGMPOOLKIND_FREE";
4612 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4613 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4614 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4615 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4616 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4617 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4618 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4619 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4620 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4621 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4622 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4623 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
4624 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4625 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
4626 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4627 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
4628 case PGMPOOLKIND_32BIT_PD:
4629 return "PGMPOOLKIND_32BIT_PD";
4630 case PGMPOOLKIND_32BIT_PD_PHYS:
4631 return "PGMPOOLKIND_32BIT_PD_PHYS";
4632 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4633 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
4634 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4635 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
4636 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4637 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
4638 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4639 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
4640 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4641 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
4642 case PGMPOOLKIND_PAE_PD_PHYS:
4643 return "PGMPOOLKIND_PAE_PD_PHYS";
4644 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4645 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
4646 case PGMPOOLKIND_PAE_PDPT:
4647 return "PGMPOOLKIND_PAE_PDPT";
4648 case PGMPOOLKIND_PAE_PDPT_PHYS:
4649 return "PGMPOOLKIND_PAE_PDPT_PHYS";
4650 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4651 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
4652 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4653 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
4654 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4655 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
4656 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4657 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
4658 case PGMPOOLKIND_64BIT_PML4:
4659 return "PGMPOOLKIND_64BIT_PML4";
4660 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4661 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
4662 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4663 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
4664 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4665 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
4666#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4667 case PGMPOOLKIND_ROOT_32BIT_PD:
4668 return "PGMPOOLKIND_ROOT_32BIT_PD";
4669 case PGMPOOLKIND_ROOT_PAE_PD:
4670 return "PGMPOOLKIND_ROOT_PAE_PD";
4671 case PGMPOOLKIND_ROOT_PDPT:
4672 return "PGMPOOLKIND_ROOT_PDPT";
4673#endif
4674 case PGMPOOLKIND_ROOT_NESTED:
4675 return "PGMPOOLKIND_ROOT_NESTED";
4676 }
4677 return "Unknown kind!";
4678}
4679#endif
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette