VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 16663

Last change on this file since 16663 was 16663, checked in by vboxsync, 16 years ago

VBOX_WITH_PGMPOOL_PAGING_ONLY: updates for 32 bits root pd invalidation + minor changes

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 168.9 KB
Line 
1/* $Id: PGMAllPool.cpp 16663 2009-02-11 13:17:20Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42
43
44/*******************************************************************************
45* Internal Functions *
46*******************************************************************************/
47__BEGIN_DECLS
48static void pgmPoolFlushAllInt(PPGMPOOL pPool);
49#ifdef PGMPOOL_WITH_USER_TRACKING
50DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
51DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#endif
54#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
55static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
56#endif
57#ifdef PGMPOOL_WITH_CACHE
58static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
59#endif
60#ifdef PGMPOOL_WITH_MONITORING
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#endif
63#ifndef IN_RING3
64DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
65#endif
66#ifdef LOG_ENABLED
67static char *pgmPoolPoolKindToStr(uint8_t enmKind);
68#endif
69__END_DECLS
70
71
72/**
73 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
74 *
75 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
76 * @param enmKind The page kind.
77 */
78DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
79{
80 switch (enmKind)
81 {
82 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
83 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
84 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
85 return true;
86 default:
87 return false;
88 }
89}
90
91
92#if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
93/**
94 * Maps a pool page into the current context.
95 *
96 * @returns Pointer to the mapping.
97 * @param pPGM Pointer to the PGM instance data.
98 * @param pPage The page to map.
99 */
100void *pgmPoolMapPageFallback(PPGM pPGM, PPGMPOOLPAGE pPage)
101{
102 /* general pages are take care of by the inlined part, it
103 only ends up here in case of failure. */
104 AssertReleaseReturn(pPage->idx < PGMPOOL_IDX_FIRST, NULL);
105
106/** @todo make sure HCPhys is valid for *all* indexes. */
107 /* special pages. */
108# ifdef IN_RC
109 switch (pPage->idx)
110 {
111# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
112 case PGMPOOL_IDX_PD:
113 case PGMPOOL_IDX_PDPT:
114 case PGMPOOL_IDX_AMD64_CR3:
115 return pPGM->pShwRootRC;
116# else
117 case PGMPOOL_IDX_PD:
118 return pPGM->pShw32BitPdRC;
119 case PGMPOOL_IDX_PAE_PD:
120 case PGMPOOL_IDX_PAE_PD_0:
121 return pPGM->apShwPaePDsRC[0];
122 case PGMPOOL_IDX_PAE_PD_1:
123 return pPGM->apShwPaePDsRC[1];
124 case PGMPOOL_IDX_PAE_PD_2:
125 return pPGM->apShwPaePDsRC[2];
126 case PGMPOOL_IDX_PAE_PD_3:
127 return pPGM->apShwPaePDsRC[3];
128 case PGMPOOL_IDX_PDPT:
129 return pPGM->pShwPaePdptRC;
130# endif
131 default:
132 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
133 return NULL;
134 }
135
136# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
137 RTHCPHYS HCPhys;
138 switch (pPage->idx)
139 {
140# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
141 case PGMPOOL_IDX_PD:
142 case PGMPOOL_IDX_PDPT:
143 case PGMPOOL_IDX_AMD64_CR3:
144 HCPhys = pPGM->HCPhysShwCR3;
145 break;
146
147 case PGMPOOL_IDX_NESTED_ROOT:
148 HCPhys = pPGM->HCPhysShwNestedRoot;
149 break;
150# else
151 case PGMPOOL_IDX_PD:
152 HCPhys = pPGM->HCPhysShw32BitPD;
153 break;
154 case PGMPOOL_IDX_PAE_PD_0:
155 HCPhys = pPGM->aHCPhysPaePDs[0];
156 break;
157 case PGMPOOL_IDX_PAE_PD_1:
158 HCPhys = pPGM->aHCPhysPaePDs[1];
159 break;
160 case PGMPOOL_IDX_PAE_PD_2:
161 HCPhys = pPGM->aHCPhysPaePDs[2];
162 break;
163 case PGMPOOL_IDX_PAE_PD_3:
164 HCPhys = pPGM->aHCPhysPaePDs[3];
165 break;
166 case PGMPOOL_IDX_PDPT:
167 HCPhys = pPGM->HCPhysShwPaePdpt;
168 break;
169 case PGMPOOL_IDX_NESTED_ROOT:
170 HCPhys = pPGM->HCPhysShwNestedRoot;
171 break;
172 case PGMPOOL_IDX_PAE_PD:
173 AssertReleaseMsgFailed(("PGMPOOL_IDX_PAE_PD is not usable in VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 context\n"));
174 return NULL;
175# endif
176 default:
177 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
178 return NULL;
179 }
180 AssertMsg(HCPhys && HCPhys != NIL_RTHCPHYS && !(PAGE_OFFSET_MASK & HCPhys), ("%RHp\n", HCPhys));
181
182 void *pv;
183 pgmR0DynMapHCPageInlined(pPGM, HCPhys, &pv);
184 return pv;
185# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
186}
187#endif /* IN_RC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
188
189
190#ifdef PGMPOOL_WITH_MONITORING
191/**
192 * Determin the size of a write instruction.
193 * @returns number of bytes written.
194 * @param pDis The disassembler state.
195 */
196static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
197{
198 /*
199 * This is very crude and possibly wrong for some opcodes,
200 * but since it's not really supposed to be called we can
201 * probably live with that.
202 */
203 return DISGetParamSize(pDis, &pDis->param1);
204}
205
206
207/**
208 * Flushes a chain of pages sharing the same access monitor.
209 *
210 * @returns VBox status code suitable for scheduling.
211 * @param pPool The pool.
212 * @param pPage A page in the chain.
213 */
214int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
215{
216 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
217
218 /*
219 * Find the list head.
220 */
221 uint16_t idx = pPage->idx;
222 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
223 {
224 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
225 {
226 idx = pPage->iMonitoredPrev;
227 Assert(idx != pPage->idx);
228 pPage = &pPool->aPages[idx];
229 }
230 }
231
232 /*
233 * Iterate the list flushing each shadow page.
234 */
235 int rc = VINF_SUCCESS;
236 for (;;)
237 {
238 idx = pPage->iMonitoredNext;
239 Assert(idx != pPage->idx);
240 if (pPage->idx >= PGMPOOL_IDX_FIRST)
241 {
242 int rc2 = pgmPoolFlushPage(pPool, pPage);
243 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
244 rc = VINF_PGM_SYNC_CR3;
245 }
246 /* next */
247 if (idx == NIL_PGMPOOL_IDX)
248 break;
249 pPage = &pPool->aPages[idx];
250 }
251 return rc;
252}
253
254
255/**
256 * Wrapper for getting the current context pointer to the entry being modified.
257 *
258 * @returns Pointer to the current context mapping of the entry.
259 * @param pPool The pool.
260 * @param pvFault The fault virtual address.
261 * @param GCPhysFault The fault physical address.
262 * @param cbEntry The entry size.
263 */
264#ifdef IN_RING3
265DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
266#else
267DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
268#endif
269{
270#ifdef IN_RC
271 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
272
273#elif defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
274 void *pvRet;
275 int rc = PGMDynMapGCPageOff(pPool->pVMR0, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
276 AssertFatalRCSuccess(rc);
277 return pvRet;
278
279#elif defined(IN_RING0)
280 void *pvRet;
281 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMR0->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
282 AssertFatalRCSuccess(rc);
283 return pvRet;
284
285#elif defined(IN_RING3)
286 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
287#else
288# error "huh?"
289#endif
290}
291
292
293/**
294 * Process shadow entries before they are changed by the guest.
295 *
296 * For PT entries we will clear them. For PD entries, we'll simply check
297 * for mapping conflicts and set the SyncCR3 FF if found.
298 *
299 * @param pPool The pool.
300 * @param pPage The head page.
301 * @param GCPhysFault The guest physical fault address.
302 * @param uAddress In R0 and GC this is the guest context fault address (flat).
303 * In R3 this is the host context 'fault' address.
304 * @param pCpu The disassembler state for figuring out the write size.
305 * This need not be specified if the caller knows we won't do cross entry accesses.
306 */
307#ifdef IN_RING3
308void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
309#else
310void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
311#endif
312{
313 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
314 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
315 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
316
317 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
318
319 for (;;)
320 {
321 union
322 {
323 void *pv;
324 PX86PT pPT;
325 PX86PTPAE pPTPae;
326 PX86PD pPD;
327 PX86PDPAE pPDPae;
328 PX86PDPT pPDPT;
329 PX86PML4 pPML4;
330 } uShw;
331
332 switch (pPage->enmKind)
333 {
334 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
335 {
336 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
337 const unsigned iShw = off / sizeof(X86PTE);
338 if (uShw.pPT->a[iShw].n.u1Present)
339 {
340# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
341 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
342 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
343 pgmPoolTracDerefGCPhysHint(pPool, pPage,
344 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
345 pGstPte->u & X86_PTE_PG_MASK);
346# endif
347 uShw.pPT->a[iShw].u = 0;
348 }
349 break;
350 }
351
352 /* page/2 sized */
353 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
354 {
355 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
356 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
357 {
358 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
359 if (uShw.pPTPae->a[iShw].n.u1Present)
360 {
361# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
362 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
363 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
364 pgmPoolTracDerefGCPhysHint(pPool, pPage,
365 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
366 pGstPte->u & X86_PTE_PG_MASK);
367# endif
368 uShw.pPTPae->a[iShw].u = 0;
369 }
370 }
371 break;
372 }
373
374# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
375 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
376 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
377 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
378 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
379 {
380 unsigned iGst = off / sizeof(X86PDE);
381 unsigned iShwPdpt = iGst / 256;
382 unsigned iShw = (iGst % 256) * 2;
383 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
384
385 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x idx = %d page idx=%d\n", iGst, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
386 if (iShwPdpt == pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
387 {
388 for (unsigned i=0;i<2;i++)
389 {
390 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
391 {
392 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
393 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
394 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
395 break;
396 }
397 else
398 if (uShw.pPDPae->a[iShw+i].n.u1Present)
399 {
400 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
401 pgmPoolFree(pPool->CTX_SUFF(pVM),
402 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
403 pPage->idx,
404 iShw + i);
405 uShw.pPDPae->a[iShw+i].u = 0;
406 }
407
408 /* paranoia / a bit assumptive. */
409 if ( pCpu
410 && (off & 3)
411 && (off & 3) + cbWrite > 4)
412 {
413 const unsigned iShw2 = iShw + 2 + i;
414 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
415 {
416 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
417 {
418 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
419 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
420 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
421 break;
422 }
423 else
424 if (uShw.pPDPae->a[iShw2].n.u1Present)
425 {
426 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
427 pgmPoolFree(pPool->CTX_SUFF(pVM),
428 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
429 pPage->idx,
430 iShw2);
431 uShw.pPDPae->a[iShw2].u = 0;
432 }
433 }
434 }
435 }
436 }
437 break;
438 }
439# endif
440
441
442 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
443 {
444 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
445 const unsigned iShw = off / sizeof(X86PTEPAE);
446 if (uShw.pPTPae->a[iShw].n.u1Present)
447 {
448# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
449 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
450 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
451 pgmPoolTracDerefGCPhysHint(pPool, pPage,
452 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
453 pGstPte->u & X86_PTE_PAE_PG_MASK);
454# endif
455 uShw.pPTPae->a[iShw].u = 0;
456 }
457
458 /* paranoia / a bit assumptive. */
459 if ( pCpu
460 && (off & 7)
461 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
462 {
463 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
464 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
465
466 if (uShw.pPTPae->a[iShw2].n.u1Present)
467 {
468# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
469 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
470 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
471 pgmPoolTracDerefGCPhysHint(pPool, pPage,
472 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
473 pGstPte->u & X86_PTE_PAE_PG_MASK);
474# endif
475 uShw.pPTPae->a[iShw2].u = 0;
476 }
477 }
478
479 break;
480 }
481
482# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
483 case PGMPOOLKIND_32BIT_PD:
484# else
485 case PGMPOOLKIND_ROOT_32BIT_PD:
486# endif
487 {
488 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
489 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
490 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
491 {
492 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
493 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
494 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
495 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
496 break;
497 }
498# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
499 else
500 {
501 if (uShw.pPD->a[iShw].n.u1Present)
502 {
503 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
504 pgmPoolFree(pPool->CTX_SUFF(pVM),
505 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
506 pPage->idx,
507 iShw);
508 uShw.pPD->a[iShw].u = 0;
509 }
510 }
511# endif
512 /* paranoia / a bit assumptive. */
513 if ( pCpu
514 && (off & 3)
515 && (off & 3) + cbWrite > sizeof(X86PTE))
516 {
517 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
518 if ( iShw2 != iShw
519 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
520 {
521
522 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
523 {
524 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
525 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
526 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
527 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
528 }
529# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
530 else
531 {
532 if (uShw.pPD->a[iShw2].n.u1Present)
533 {
534 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
535 pgmPoolFree(pPool->CTX_SUFF(pVM),
536 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
537 pPage->idx,
538 iShw2);
539 uShw.pPD->a[iShw2].u = 0;
540 }
541 }
542# endif
543 }
544 }
545#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
546 if ( uShw.pPD->a[iShw].n.u1Present
547 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
548 {
549 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
550# ifdef IN_RC /* TLB load - we're pushing things a bit... */
551 ASMProbeReadByte(pvAddress);
552# endif
553 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
554 uShw.pPD->a[iShw].u = 0;
555 }
556#endif
557 break;
558 }
559
560# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
561 case PGMPOOLKIND_ROOT_PAE_PD:
562 {
563 unsigned iGst = off / sizeof(X86PDE); // ASSUMING 32-bit guest paging!
564 unsigned iShwPdpt = iGst / 256;
565 unsigned iShw = (iGst % 256) * 2;
566 Assert(pPage->idx == PGMPOOL_IDX_PAE_PD);
567 PPGMPOOLPAGE pPage2 = pPage + 1 + iShwPdpt;
568 Assert(pPage2->idx == PGMPOOL_IDX_PAE_PD_0 + iShwPdpt);
569 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage2);
570 for (unsigned i = 0; i < 2; i++, iShw++)
571 {
572 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
573 {
574 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
575 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
576 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw));
577 }
578 /* paranoia / a bit assumptive. */
579 else if ( pCpu
580 && (off & 3)
581 && (off & 3) + cbWrite > 4)
582 {
583 const unsigned iShw2 = iShw + 2;
584 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a) /** @todo was completely wrong, it's better now after #1865 but still wrong from cross PD. */
585 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
586 {
587 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
588 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
589 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
590 }
591 }
592#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
593 if ( uShw.pPDPae->a[iShw].n.u1Present
594 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
595 {
596 LogFlow(("pgmPoolMonitorChainChanging: iShwPdpt=%#x iShw=%#x: %RX64 -> freeing it!\n", iShwPdpt, iShw, uShw.pPDPae->a[iShw].u));
597# ifdef IN_RC /* TLB load - we're pushing things a bit... */
598 ASMProbeReadByte(pvAddress);
599# endif
600 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + iShwPdpt * X86_PG_PAE_ENTRIES);
601 uShw.pPDPae->a[iShw].u = 0;
602 }
603#endif
604 }
605 break;
606 }
607# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
608
609 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
610 {
611 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
612 const unsigned iShw = off / sizeof(X86PDEPAE);
613 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
614 {
615 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
616 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
617 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
618 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
619 break;
620 }
621#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
622 /*
623 * Causes trouble when the guest uses a PDE to refer to the whole page table level
624 * structure. (Invalidate here; faults later on when it tries to change the page
625 * table entries -> recheck; probably only applies to the RC case.)
626 */
627 else
628 {
629 if (uShw.pPDPae->a[iShw].n.u1Present)
630 {
631 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
632 pgmPoolFree(pPool->CTX_SUFF(pVM),
633 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
634# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
635 pPage->idx,
636 iShw);
637# else
638 /* Note: hardcoded PAE implementation dependency */
639 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
640 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
641# endif
642 uShw.pPDPae->a[iShw].u = 0;
643 }
644 }
645#endif
646 /* paranoia / a bit assumptive. */
647 if ( pCpu
648 && (off & 7)
649 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
650 {
651 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
652 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
653
654 if ( iShw2 != iShw
655 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
656 {
657 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
658 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
659 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
660 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
661 }
662#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
663 else if (uShw.pPDPae->a[iShw2].n.u1Present)
664 {
665 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
666 pgmPoolFree(pPool->CTX_SUFF(pVM),
667 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
668# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
669 pPage->idx,
670 iShw2);
671# else
672 /* Note: hardcoded PAE implementation dependency */
673 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
674 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
675# endif
676 uShw.pPDPae->a[iShw2].u = 0;
677 }
678#endif
679 }
680 break;
681 }
682
683# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
684 case PGMPOOLKIND_PAE_PDPT:
685# else
686 case PGMPOOLKIND_ROOT_PDPT:
687# endif
688 {
689 /*
690 * Hopefully this doesn't happen very often:
691 * - touching unused parts of the page
692 * - messing with the bits of pd pointers without changing the physical address
693 */
694# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
695 /* PDPT roots are not page aligned; 32 byte only! */
696 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
697# else
698 const unsigned offPdpt = off;
699# endif
700 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
701 const unsigned iShw = offPdpt / sizeof(X86PDPE);
702 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
703 {
704 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
705 {
706 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
707 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
708 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
709 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
710 break;
711 }
712# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
713 else
714 if (uShw.pPDPT->a[iShw].n.u1Present)
715 {
716 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
717 pgmPoolFree(pPool->CTX_SUFF(pVM),
718 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
719 pPage->idx,
720 iShw);
721 uShw.pPDPT->a[iShw].u = 0;
722 }
723# endif
724
725 /* paranoia / a bit assumptive. */
726 if ( pCpu
727 && (offPdpt & 7)
728 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
729 {
730 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
731 if ( iShw2 != iShw
732 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
733 {
734 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
735 {
736 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
737 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
738 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
739 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
740 }
741# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
742 else
743 if (uShw.pPDPT->a[iShw2].n.u1Present)
744 {
745 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
746 pgmPoolFree(pPool->CTX_SUFF(pVM),
747 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
748 pPage->idx,
749 iShw2);
750 uShw.pPDPT->a[iShw2].u = 0;
751 }
752# endif
753 }
754 }
755 }
756 break;
757 }
758
759#ifndef IN_RC
760 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
761 {
762 Assert(pPage->enmKind == PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD);
763
764 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
765 const unsigned iShw = off / sizeof(X86PDEPAE);
766 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
767 {
768 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
769 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
770 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
771 }
772 else
773 {
774 if (uShw.pPDPae->a[iShw].n.u1Present)
775 {
776 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
777 pgmPoolFree(pPool->CTX_SUFF(pVM),
778 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
779 pPage->idx,
780 iShw);
781 uShw.pPDPae->a[iShw].u = 0;
782 }
783 }
784 /* paranoia / a bit assumptive. */
785 if ( pCpu
786 && (off & 7)
787 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
788 {
789 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
790 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
791
792 if ( iShw2 != iShw
793 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
794 {
795 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
796 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
797 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
798 }
799 else
800 if (uShw.pPDPae->a[iShw2].n.u1Present)
801 {
802 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
803 pgmPoolFree(pPool->CTX_SUFF(pVM),
804 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
805 pPage->idx,
806 iShw2);
807 uShw.pPDPae->a[iShw2].u = 0;
808 }
809 }
810 break;
811 }
812
813 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
814 {
815 /*
816 * Hopefully this doesn't happen very often:
817 * - messing with the bits of pd pointers without changing the physical address
818 */
819# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
820 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
821# endif
822 {
823 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
824 const unsigned iShw = off / sizeof(X86PDPE);
825 if (uShw.pPDPT->a[iShw].n.u1Present)
826 {
827 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
828 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
829 uShw.pPDPT->a[iShw].u = 0;
830 }
831 /* paranoia / a bit assumptive. */
832 if ( pCpu
833 && (off & 7)
834 && (off & 7) + cbWrite > sizeof(X86PDPE))
835 {
836 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
837 if (uShw.pPDPT->a[iShw2].n.u1Present)
838 {
839 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
840 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
841 uShw.pPDPT->a[iShw2].u = 0;
842 }
843 }
844 }
845 break;
846 }
847
848 case PGMPOOLKIND_64BIT_PML4:
849 {
850 /*
851 * Hopefully this doesn't happen very often:
852 * - messing with the bits of pd pointers without changing the physical address
853 */
854# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
855 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
856# endif
857 {
858 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
859 const unsigned iShw = off / sizeof(X86PDPE);
860 if (uShw.pPML4->a[iShw].n.u1Present)
861 {
862 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
863 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
864 uShw.pPML4->a[iShw].u = 0;
865 }
866 /* paranoia / a bit assumptive. */
867 if ( pCpu
868 && (off & 7)
869 && (off & 7) + cbWrite > sizeof(X86PDPE))
870 {
871 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
872 if (uShw.pPML4->a[iShw2].n.u1Present)
873 {
874 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
875 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
876 uShw.pPML4->a[iShw2].u = 0;
877 }
878 }
879 }
880 break;
881 }
882#endif /* IN_RING0 */
883
884 default:
885 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
886 }
887
888 /* next */
889 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
890 return;
891 pPage = &pPool->aPages[pPage->iMonitoredNext];
892 }
893}
894
895
896# ifndef IN_RING3
897/**
898 * Checks if a access could be a fork operation in progress.
899 *
900 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
901 *
902 * @returns true if it's likly that we're forking, otherwise false.
903 * @param pPool The pool.
904 * @param pCpu The disassembled instruction.
905 * @param offFault The access offset.
906 */
907DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
908{
909 /*
910 * i386 linux is using btr to clear X86_PTE_RW.
911 * The functions involved are (2.6.16 source inspection):
912 * clear_bit
913 * ptep_set_wrprotect
914 * copy_one_pte
915 * copy_pte_range
916 * copy_pmd_range
917 * copy_pud_range
918 * copy_page_range
919 * dup_mmap
920 * dup_mm
921 * copy_mm
922 * copy_process
923 * do_fork
924 */
925 if ( pCpu->pCurInstr->opcode == OP_BTR
926 && !(offFault & 4)
927 /** @todo Validate that the bit index is X86_PTE_RW. */
928 )
929 {
930 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
931 return true;
932 }
933 return false;
934}
935
936
937/**
938 * Determine whether the page is likely to have been reused.
939 *
940 * @returns true if we consider the page as being reused for a different purpose.
941 * @returns false if we consider it to still be a paging page.
942 * @param pVM VM Handle.
943 * @param pPage The page in question.
944 * @param pRegFrame Trap register frame.
945 * @param pCpu The disassembly info for the faulting instruction.
946 * @param pvFault The fault address.
947 *
948 * @remark The REP prefix check is left to the caller because of STOSD/W.
949 */
950DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
951{
952#ifndef IN_RC
953 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
954 if ( HWACCMHasPendingIrq(pVM)
955 && (pRegFrame->rsp - pvFault) < 32)
956 {
957 /* Fault caused by stack writes while trying to inject an interrupt event. */
958 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
959 return true;
960 }
961#else
962 NOREF(pVM); NOREF(pvFault);
963#endif
964
965 switch (pCpu->pCurInstr->opcode)
966 {
967 /* call implies the actual push of the return address faulted */
968 case OP_CALL:
969 Log4(("pgmPoolMonitorIsReused: CALL\n"));
970 return true;
971 case OP_PUSH:
972 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
973 return true;
974 case OP_PUSHF:
975 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
976 return true;
977 case OP_PUSHA:
978 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
979 return true;
980 case OP_FXSAVE:
981 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
982 return true;
983 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
984 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
985 return true;
986 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
987 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
988 return true;
989 case OP_MOVSWD:
990 case OP_STOSWD:
991 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
992 && pRegFrame->rcx >= 0x40
993 )
994 {
995 Assert(pCpu->mode == CPUMODE_64BIT);
996
997 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
998 return true;
999 }
1000 return false;
1001 }
1002 if ( (pCpu->param1.flags & USE_REG_GEN32)
1003 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
1004 {
1005 Log4(("pgmPoolMonitorIsReused: ESP\n"));
1006 return true;
1007 }
1008
1009 //if (pPage->fCR3Mix)
1010 // return false;
1011 return false;
1012}
1013
1014
1015/**
1016 * Flushes the page being accessed.
1017 *
1018 * @returns VBox status code suitable for scheduling.
1019 * @param pVM The VM handle.
1020 * @param pPool The pool.
1021 * @param pPage The pool page (head).
1022 * @param pCpu The disassembly of the write instruction.
1023 * @param pRegFrame The trap register frame.
1024 * @param GCPhysFault The fault address as guest physical address.
1025 * @param pvFault The fault address.
1026 */
1027static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1028 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1029{
1030 /*
1031 * First, do the flushing.
1032 */
1033 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
1034
1035 /*
1036 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
1037 */
1038 uint32_t cbWritten;
1039 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
1040 if (RT_SUCCESS(rc2))
1041 pRegFrame->rip += pCpu->opsize;
1042 else if (rc2 == VERR_EM_INTERPRETER)
1043 {
1044#ifdef IN_RC
1045 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
1046 {
1047 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
1048 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
1049 rc = VINF_SUCCESS;
1050 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
1051 }
1052 else
1053#endif
1054 {
1055 rc = VINF_EM_RAW_EMULATE_INSTR;
1056 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1057 }
1058 }
1059 else
1060 rc = rc2;
1061
1062 /* See use in pgmPoolAccessHandlerSimple(). */
1063 PGM_INVL_GUEST_TLBS();
1064
1065 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
1066 return rc;
1067
1068}
1069
1070
1071/**
1072 * Handles the STOSD write accesses.
1073 *
1074 * @returns VBox status code suitable for scheduling.
1075 * @param pVM The VM handle.
1076 * @param pPool The pool.
1077 * @param pPage The pool page (head).
1078 * @param pCpu The disassembly of the write instruction.
1079 * @param pRegFrame The trap register frame.
1080 * @param GCPhysFault The fault address as guest physical address.
1081 * @param pvFault The fault address.
1082 */
1083DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1084 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1085{
1086 Assert(pCpu->mode == CPUMODE_32BIT);
1087
1088 /*
1089 * Increment the modification counter and insert it into the list
1090 * of modified pages the first time.
1091 */
1092 if (!pPage->cModifications++)
1093 pgmPoolMonitorModifiedInsert(pPool, pPage);
1094
1095 /*
1096 * Execute REP STOSD.
1097 *
1098 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
1099 * write situation, meaning that it's safe to write here.
1100 */
1101#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1102 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1103#endif
1104 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
1105 while (pRegFrame->ecx)
1106 {
1107#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1108 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1109 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1110 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1111#else
1112 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1113#endif
1114#ifdef IN_RC
1115 *(uint32_t *)pu32 = pRegFrame->eax;
1116#else
1117 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
1118#endif
1119 pu32 += 4;
1120 GCPhysFault += 4;
1121 pRegFrame->edi += 4;
1122 pRegFrame->ecx--;
1123 }
1124 pRegFrame->rip += pCpu->opsize;
1125
1126 /* See use in pgmPoolAccessHandlerSimple(). */
1127 PGM_INVL_GUEST_TLBS();
1128
1129 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1130 return VINF_SUCCESS;
1131}
1132
1133
1134/**
1135 * Handles the simple write accesses.
1136 *
1137 * @returns VBox status code suitable for scheduling.
1138 * @param pVM The VM handle.
1139 * @param pPool The pool.
1140 * @param pPage The pool page (head).
1141 * @param pCpu The disassembly of the write instruction.
1142 * @param pRegFrame The trap register frame.
1143 * @param GCPhysFault The fault address as guest physical address.
1144 * @param pvFault The fault address.
1145 */
1146DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1147 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1148{
1149 /*
1150 * Increment the modification counter and insert it into the list
1151 * of modified pages the first time.
1152 */
1153 if (!pPage->cModifications++)
1154 pgmPoolMonitorModifiedInsert(pPool, pPage);
1155
1156 /*
1157 * Clear all the pages. ASSUMES that pvFault is readable.
1158 */
1159#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1160 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1161 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1162 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1163 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1164#else
1165 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1166#endif
1167
1168 /*
1169 * Interpret the instruction.
1170 */
1171 uint32_t cb;
1172 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
1173 if (RT_SUCCESS(rc))
1174 pRegFrame->rip += pCpu->opsize;
1175 else if (rc == VERR_EM_INTERPRETER)
1176 {
1177 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1178 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
1179 rc = VINF_EM_RAW_EMULATE_INSTR;
1180 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1181 }
1182
1183 /*
1184 * Quick hack, with logging enabled we're getting stale
1185 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1186 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1187 * have to be fixed to support this. But that'll have to wait till next week.
1188 *
1189 * An alternative is to keep track of the changed PTEs together with the
1190 * GCPhys from the guest PT. This may proove expensive though.
1191 *
1192 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1193 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1194 */
1195 PGM_INVL_GUEST_TLBS();
1196
1197 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1198 return rc;
1199}
1200
1201
1202/**
1203 * \#PF Handler callback for PT write accesses.
1204 *
1205 * @returns VBox status code (appropriate for GC return).
1206 * @param pVM VM Handle.
1207 * @param uErrorCode CPU Error code.
1208 * @param pRegFrame Trap register frame.
1209 * NULL on DMA and other non CPU access.
1210 * @param pvFault The fault address (cr2).
1211 * @param GCPhysFault The GC physical address corresponding to pvFault.
1212 * @param pvUser User argument.
1213 */
1214DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1215{
1216 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1217 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1218 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1219 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1220
1221 /*
1222 * We should ALWAYS have the list head as user parameter. This
1223 * is because we use that page to record the changes.
1224 */
1225 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1226
1227 /*
1228 * Disassemble the faulting instruction.
1229 */
1230 DISCPUSTATE Cpu;
1231 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1232 AssertRCReturn(rc, rc);
1233
1234 /*
1235 * Check if it's worth dealing with.
1236 */
1237 bool fReused = false;
1238 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1239 || pPage->fCR3Mix)
1240 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1241 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1242 {
1243 /*
1244 * Simple instructions, no REP prefix.
1245 */
1246 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1247 {
1248 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1249 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1250 return rc;
1251 }
1252
1253 /*
1254 * Windows is frequently doing small memset() operations (netio test 4k+).
1255 * We have to deal with these or we'll kill the cache and performance.
1256 */
1257 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1258 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1259 && pRegFrame->ecx <= 0x20
1260 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1261 && !((uintptr_t)pvFault & 3)
1262 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1263 && Cpu.mode == CPUMODE_32BIT
1264 && Cpu.opmode == CPUMODE_32BIT
1265 && Cpu.addrmode == CPUMODE_32BIT
1266 && Cpu.prefix == PREFIX_REP
1267 && !pRegFrame->eflags.Bits.u1DF
1268 )
1269 {
1270 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1271 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1272 return rc;
1273 }
1274
1275 /* REP prefix, don't bother. */
1276 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1277 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1278 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1279 }
1280
1281 /*
1282 * Not worth it, so flush it.
1283 *
1284 * If we considered it to be reused, don't to back to ring-3
1285 * to emulate failed instructions since we usually cannot
1286 * interpret then. This may be a bit risky, in which case
1287 * the reuse detection must be fixed.
1288 */
1289 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1290 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1291 rc = VINF_SUCCESS;
1292 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1293 return rc;
1294}
1295
1296# endif /* !IN_RING3 */
1297#endif /* PGMPOOL_WITH_MONITORING */
1298
1299#ifdef PGMPOOL_WITH_CACHE
1300
1301/**
1302 * Inserts a page into the GCPhys hash table.
1303 *
1304 * @param pPool The pool.
1305 * @param pPage The page.
1306 */
1307DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1308{
1309 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1310 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1311 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1312 pPage->iNext = pPool->aiHash[iHash];
1313 pPool->aiHash[iHash] = pPage->idx;
1314}
1315
1316
1317/**
1318 * Removes a page from the GCPhys hash table.
1319 *
1320 * @param pPool The pool.
1321 * @param pPage The page.
1322 */
1323DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1324{
1325 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1326 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1327 if (pPool->aiHash[iHash] == pPage->idx)
1328 pPool->aiHash[iHash] = pPage->iNext;
1329 else
1330 {
1331 uint16_t iPrev = pPool->aiHash[iHash];
1332 for (;;)
1333 {
1334 const int16_t i = pPool->aPages[iPrev].iNext;
1335 if (i == pPage->idx)
1336 {
1337 pPool->aPages[iPrev].iNext = pPage->iNext;
1338 break;
1339 }
1340 if (i == NIL_PGMPOOL_IDX)
1341 {
1342 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1343 break;
1344 }
1345 iPrev = i;
1346 }
1347 }
1348 pPage->iNext = NIL_PGMPOOL_IDX;
1349}
1350
1351
1352/**
1353 * Frees up one cache page.
1354 *
1355 * @returns VBox status code.
1356 * @retval VINF_SUCCESS on success.
1357 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1358 * @param pPool The pool.
1359 * @param iUser The user index.
1360 */
1361static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1362{
1363#ifndef IN_RC
1364 const PVM pVM = pPool->CTX_SUFF(pVM);
1365#endif
1366 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1367 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1368
1369 /*
1370 * Select one page from the tail of the age list.
1371 */
1372 uint16_t iToFree = pPool->iAgeTail;
1373 if (iToFree == iUser)
1374 iToFree = pPool->aPages[iToFree].iAgePrev;
1375/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1376 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1377 {
1378 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1379 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1380 {
1381 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1382 continue;
1383 iToFree = i;
1384 break;
1385 }
1386 }
1387*/
1388
1389 Assert(iToFree != iUser);
1390 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1391
1392 PPGMPOOLPAGE pPage = &pPool->aPages[iToFree];
1393
1394 /*
1395 * Reject any attempts at flushing the currently active shadow CR3 mapping
1396 */
1397 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
1398 {
1399 /* Refresh the cr3 mapping by putting it at the head of the age list. */
1400 pgmPoolCacheUsed(pPool, pPage);
1401 return pgmPoolCacheFreeOne(pPool, iUser);
1402 }
1403
1404 int rc = pgmPoolFlushPage(pPool, pPage);
1405 if (rc == VINF_SUCCESS)
1406 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1407 return rc;
1408}
1409
1410
1411/**
1412 * Checks if a kind mismatch is really a page being reused
1413 * or if it's just normal remappings.
1414 *
1415 * @returns true if reused and the cached page (enmKind1) should be flushed
1416 * @returns false if not reused.
1417 * @param enmKind1 The kind of the cached page.
1418 * @param enmKind2 The kind of the requested page.
1419 */
1420static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1421{
1422 switch (enmKind1)
1423 {
1424 /*
1425 * Never reuse them. There is no remapping in non-paging mode.
1426 */
1427 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1428 case PGMPOOLKIND_32BIT_PD_PHYS:
1429 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1430 case PGMPOOLKIND_PAE_PD_PHYS:
1431 case PGMPOOLKIND_PAE_PDPT_PHYS:
1432 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1433 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1434 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1435 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1436 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1437#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1438 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1439 return false;
1440#else
1441 return true;
1442#endif
1443
1444 /*
1445 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1446 */
1447 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1448 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1449 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1450 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1451 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1452 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1453 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1454 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1455 switch (enmKind2)
1456 {
1457 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1458 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1459 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1460 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1461 case PGMPOOLKIND_64BIT_PML4:
1462 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1463 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1464 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1465 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1466 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1467 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1468 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1469 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1470 return true;
1471 default:
1472 return false;
1473 }
1474
1475 /*
1476 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1477 */
1478 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1479 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1480 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1481 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1482 case PGMPOOLKIND_64BIT_PML4:
1483 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1484 switch (enmKind2)
1485 {
1486 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1487 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1488 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1489 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1490 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1491 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1492 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1493 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1494 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1495 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1496 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1497 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1498 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1499 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1500 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1501 return true;
1502 default:
1503 return false;
1504 }
1505
1506 /*
1507 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1508 */
1509#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1510 case PGMPOOLKIND_ROOT_32BIT_PD:
1511 case PGMPOOLKIND_ROOT_PAE_PD:
1512 case PGMPOOLKIND_ROOT_PDPT:
1513#endif
1514 case PGMPOOLKIND_ROOT_NESTED:
1515 return false;
1516
1517 default:
1518 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1519 }
1520}
1521
1522
1523/**
1524 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1525 *
1526 * @returns VBox status code.
1527 * @retval VINF_PGM_CACHED_PAGE on success.
1528 * @retval VERR_FILE_NOT_FOUND if not found.
1529 * @param pPool The pool.
1530 * @param GCPhys The GC physical address of the page we're gonna shadow.
1531 * @param enmKind The kind of mapping.
1532 * @param iUser The shadow page pool index of the user table.
1533 * @param iUserTable The index into the user table (shadowed).
1534 * @param ppPage Where to store the pointer to the page.
1535 */
1536static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1537{
1538#ifndef IN_RC
1539 const PVM pVM = pPool->CTX_SUFF(pVM);
1540#endif
1541 /*
1542 * Look up the GCPhys in the hash.
1543 */
1544 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1545 Log3(("pgmPoolCacheAlloc: %RGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1546 if (i != NIL_PGMPOOL_IDX)
1547 {
1548 do
1549 {
1550 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1551 Log3(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1552 if (pPage->GCPhys == GCPhys)
1553 {
1554 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1555 {
1556 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1557 if (RT_SUCCESS(rc))
1558 {
1559 *ppPage = pPage;
1560 STAM_COUNTER_INC(&pPool->StatCacheHits);
1561 return VINF_PGM_CACHED_PAGE;
1562 }
1563 return rc;
1564 }
1565
1566 /*
1567 * The kind is different. In some cases we should now flush the page
1568 * as it has been reused, but in most cases this is normal remapping
1569 * of PDs as PT or big pages using the GCPhys field in a slightly
1570 * different way than the other kinds.
1571 */
1572 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1573 {
1574 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1575 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1576 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1577 break;
1578 }
1579 }
1580
1581 /* next */
1582 i = pPage->iNext;
1583 } while (i != NIL_PGMPOOL_IDX);
1584 }
1585
1586 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
1587 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1588 return VERR_FILE_NOT_FOUND;
1589}
1590
1591
1592/**
1593 * Inserts a page into the cache.
1594 *
1595 * @param pPool The pool.
1596 * @param pPage The cached page.
1597 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1598 */
1599static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1600{
1601 /*
1602 * Insert into the GCPhys hash if the page is fit for that.
1603 */
1604 Assert(!pPage->fCached);
1605 if (fCanBeCached)
1606 {
1607 pPage->fCached = true;
1608 pgmPoolHashInsert(pPool, pPage);
1609 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1610 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1611 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1612 }
1613 else
1614 {
1615 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1616 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1617 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1618 }
1619
1620 /*
1621 * Insert at the head of the age list.
1622 */
1623 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1624 pPage->iAgeNext = pPool->iAgeHead;
1625 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1626 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1627 else
1628 pPool->iAgeTail = pPage->idx;
1629 pPool->iAgeHead = pPage->idx;
1630}
1631
1632
1633/**
1634 * Flushes a cached page.
1635 *
1636 * @param pPool The pool.
1637 * @param pPage The cached page.
1638 */
1639static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1640{
1641 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1642
1643 /*
1644 * Remove the page from the hash.
1645 */
1646 if (pPage->fCached)
1647 {
1648 pPage->fCached = false;
1649 pgmPoolHashRemove(pPool, pPage);
1650 }
1651 else
1652 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1653
1654 /*
1655 * Remove it from the age list.
1656 */
1657 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1658 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1659 else
1660 pPool->iAgeTail = pPage->iAgePrev;
1661 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1662 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1663 else
1664 pPool->iAgeHead = pPage->iAgeNext;
1665 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1666 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1667}
1668
1669#endif /* PGMPOOL_WITH_CACHE */
1670#ifdef PGMPOOL_WITH_MONITORING
1671
1672/**
1673 * Looks for pages sharing the monitor.
1674 *
1675 * @returns Pointer to the head page.
1676 * @returns NULL if not found.
1677 * @param pPool The Pool
1678 * @param pNewPage The page which is going to be monitored.
1679 */
1680static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1681{
1682#ifdef PGMPOOL_WITH_CACHE
1683 /*
1684 * Look up the GCPhys in the hash.
1685 */
1686 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1687 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1688 if (i == NIL_PGMPOOL_IDX)
1689 return NULL;
1690 do
1691 {
1692 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1693 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1694 && pPage != pNewPage)
1695 {
1696 switch (pPage->enmKind)
1697 {
1698 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1699 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1700 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1701 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1702 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1703 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1704 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1705 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1706 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1707 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1708 case PGMPOOLKIND_64BIT_PML4:
1709#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1710 case PGMPOOLKIND_32BIT_PD:
1711 case PGMPOOLKIND_PAE_PDPT:
1712#else
1713 case PGMPOOLKIND_ROOT_32BIT_PD:
1714 case PGMPOOLKIND_ROOT_PAE_PD:
1715 case PGMPOOLKIND_ROOT_PDPT:
1716#endif
1717 {
1718 /* find the head */
1719 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1720 {
1721 Assert(pPage->iMonitoredPrev != pPage->idx);
1722 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1723 }
1724 return pPage;
1725 }
1726
1727 /* ignore, no monitoring. */
1728 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1729 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1730 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1731 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1732 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1733 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1734 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1735 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1736 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1737 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1738 case PGMPOOLKIND_ROOT_NESTED:
1739 case PGMPOOLKIND_PAE_PD_PHYS:
1740 case PGMPOOLKIND_PAE_PDPT_PHYS:
1741 case PGMPOOLKIND_32BIT_PD_PHYS:
1742#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1743 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1744#endif
1745 break;
1746 default:
1747 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1748 }
1749 }
1750
1751 /* next */
1752 i = pPage->iNext;
1753 } while (i != NIL_PGMPOOL_IDX);
1754#endif
1755 return NULL;
1756}
1757
1758
1759/**
1760 * Enabled write monitoring of a guest page.
1761 *
1762 * @returns VBox status code.
1763 * @retval VINF_SUCCESS on success.
1764 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1765 * @param pPool The pool.
1766 * @param pPage The cached page.
1767 */
1768static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1769{
1770 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1771
1772 /*
1773 * Filter out the relevant kinds.
1774 */
1775 switch (pPage->enmKind)
1776 {
1777 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1778 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1779 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1780 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1781 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1782 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1783 case PGMPOOLKIND_64BIT_PML4:
1784#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1785 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1786 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1787 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1788 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1789 case PGMPOOLKIND_32BIT_PD:
1790 case PGMPOOLKIND_PAE_PDPT:
1791#else
1792 case PGMPOOLKIND_ROOT_PDPT:
1793#endif
1794 break;
1795
1796 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1797 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1798 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1799 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1800 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1801 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1802 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1803 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1804 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1805 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1806 case PGMPOOLKIND_ROOT_NESTED:
1807 /* Nothing to monitor here. */
1808 return VINF_SUCCESS;
1809
1810#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1811 case PGMPOOLKIND_32BIT_PD_PHYS:
1812 case PGMPOOLKIND_PAE_PDPT_PHYS:
1813 case PGMPOOLKIND_PAE_PD_PHYS:
1814 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1815 /* Nothing to monitor here. */
1816 return VINF_SUCCESS;
1817#else
1818 case PGMPOOLKIND_ROOT_32BIT_PD:
1819 case PGMPOOLKIND_ROOT_PAE_PD:
1820#endif
1821#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1822 break;
1823#else
1824 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1825#endif
1826 default:
1827 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1828 }
1829
1830 /*
1831 * Install handler.
1832 */
1833 int rc;
1834 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1835 if (pPageHead)
1836 {
1837 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1838 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1839 pPage->iMonitoredPrev = pPageHead->idx;
1840 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1841 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1842 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1843 pPageHead->iMonitoredNext = pPage->idx;
1844 rc = VINF_SUCCESS;
1845 }
1846 else
1847 {
1848 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1849 PVM pVM = pPool->CTX_SUFF(pVM);
1850 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1851 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1852 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1853 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1854 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1855 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1856 pPool->pszAccessHandler);
1857 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1858 * the heap size should suffice. */
1859 AssertFatalRC(rc);
1860 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1861 rc = VERR_PGM_POOL_CLEARED;
1862 }
1863 pPage->fMonitored = true;
1864 return rc;
1865}
1866
1867
1868/**
1869 * Disables write monitoring of a guest page.
1870 *
1871 * @returns VBox status code.
1872 * @retval VINF_SUCCESS on success.
1873 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1874 * @param pPool The pool.
1875 * @param pPage The cached page.
1876 */
1877static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1878{
1879 /*
1880 * Filter out the relevant kinds.
1881 */
1882 switch (pPage->enmKind)
1883 {
1884 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1885 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1886 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1887 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1888 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1889 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1890 case PGMPOOLKIND_64BIT_PML4:
1891#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1892 case PGMPOOLKIND_32BIT_PD:
1893 case PGMPOOLKIND_PAE_PDPT:
1894 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1895 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1896 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1897 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1898#else
1899 case PGMPOOLKIND_ROOT_PDPT:
1900#endif
1901 break;
1902
1903 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1904 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1905 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1906 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1907 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1908 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1909 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1910 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1911 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1912 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1913 case PGMPOOLKIND_ROOT_NESTED:
1914 case PGMPOOLKIND_PAE_PD_PHYS:
1915 case PGMPOOLKIND_PAE_PDPT_PHYS:
1916 case PGMPOOLKIND_32BIT_PD_PHYS:
1917 /* Nothing to monitor here. */
1918 return VINF_SUCCESS;
1919
1920#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1921 case PGMPOOLKIND_ROOT_32BIT_PD:
1922 case PGMPOOLKIND_ROOT_PAE_PD:
1923#endif
1924#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1925 break;
1926#endif
1927#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1928 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1929#endif
1930 default:
1931 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1932 }
1933
1934 /*
1935 * Remove the page from the monitored list or uninstall it if last.
1936 */
1937 const PVM pVM = pPool->CTX_SUFF(pVM);
1938 int rc;
1939 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1940 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1941 {
1942 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1943 {
1944 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1945 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1946 pNewHead->fCR3Mix = pPage->fCR3Mix;
1947 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1948 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1949 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1950 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
1951 pPool->pszAccessHandler);
1952 AssertFatalRCSuccess(rc);
1953 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1954 }
1955 else
1956 {
1957 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1958 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1959 {
1960 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1961 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1962 }
1963 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1964 rc = VINF_SUCCESS;
1965 }
1966 }
1967 else
1968 {
1969 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1970 AssertFatalRC(rc);
1971 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1972 rc = VERR_PGM_POOL_CLEARED;
1973 }
1974 pPage->fMonitored = false;
1975
1976 /*
1977 * Remove it from the list of modified pages (if in it).
1978 */
1979 pgmPoolMonitorModifiedRemove(pPool, pPage);
1980
1981 return rc;
1982}
1983
1984# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1985
1986/**
1987 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1988 *
1989 * @param pPool The Pool.
1990 * @param pPage A page in the chain.
1991 * @param fCR3Mix The new fCR3Mix value.
1992 */
1993static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1994{
1995 /* current */
1996 pPage->fCR3Mix = fCR3Mix;
1997
1998 /* before */
1999 int16_t idx = pPage->iMonitoredPrev;
2000 while (idx != NIL_PGMPOOL_IDX)
2001 {
2002 pPool->aPages[idx].fCR3Mix = fCR3Mix;
2003 idx = pPool->aPages[idx].iMonitoredPrev;
2004 }
2005
2006 /* after */
2007 idx = pPage->iMonitoredNext;
2008 while (idx != NIL_PGMPOOL_IDX)
2009 {
2010 pPool->aPages[idx].fCR3Mix = fCR3Mix;
2011 idx = pPool->aPages[idx].iMonitoredNext;
2012 }
2013}
2014
2015
2016/**
2017 * Installs or modifies monitoring of a CR3 page (special).
2018 *
2019 * We're pretending the CR3 page is shadowed by the pool so we can use the
2020 * generic mechanisms in detecting chained monitoring. (This also gives us a
2021 * tast of what code changes are required to really pool CR3 shadow pages.)
2022 *
2023 * @returns VBox status code.
2024 * @param pPool The pool.
2025 * @param idxRoot The CR3 (root) page index.
2026 * @param GCPhysCR3 The (new) CR3 value.
2027 */
2028int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
2029{
2030 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
2031 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
2032 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d} GCPhysCR3=%RGp\n",
2033 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
2034
2035 /*
2036 * The unlikely case where it already matches.
2037 */
2038 if (pPage->GCPhys == GCPhysCR3)
2039 {
2040 Assert(pPage->fMonitored);
2041 return VINF_SUCCESS;
2042 }
2043
2044 /*
2045 * Flush the current monitoring and remove it from the hash.
2046 */
2047 int rc = VINF_SUCCESS;
2048 if (pPage->fMonitored)
2049 {
2050 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
2051 rc = pgmPoolMonitorFlush(pPool, pPage);
2052 if (rc == VERR_PGM_POOL_CLEARED)
2053 rc = VINF_SUCCESS;
2054 else
2055 AssertFatalRC(rc);
2056 pgmPoolHashRemove(pPool, pPage);
2057 }
2058
2059 /*
2060 * Monitor the page at the new location and insert it into the hash.
2061 */
2062 pPage->GCPhys = GCPhysCR3;
2063 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
2064 if (rc2 != VERR_PGM_POOL_CLEARED)
2065 {
2066 AssertFatalRC(rc2);
2067 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
2068 rc = rc2;
2069 }
2070 pgmPoolHashInsert(pPool, pPage);
2071 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
2072 return rc;
2073}
2074
2075
2076/**
2077 * Removes the monitoring of a CR3 page (special).
2078 *
2079 * @returns VBox status code.
2080 * @param pPool The pool.
2081 * @param idxRoot The CR3 (root) page index.
2082 */
2083int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
2084{
2085 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
2086 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
2087 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d}\n",
2088 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
2089
2090 if (!pPage->fMonitored)
2091 return VINF_SUCCESS;
2092
2093 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
2094 int rc = pgmPoolMonitorFlush(pPool, pPage);
2095 if (rc != VERR_PGM_POOL_CLEARED)
2096 AssertFatalRC(rc);
2097 else
2098 rc = VINF_SUCCESS;
2099 pgmPoolHashRemove(pPool, pPage);
2100 Assert(!pPage->fMonitored);
2101 pPage->GCPhys = NIL_RTGCPHYS;
2102 return rc;
2103}
2104
2105# endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
2106
2107/**
2108 * Inserts the page into the list of modified pages.
2109 *
2110 * @param pPool The pool.
2111 * @param pPage The page.
2112 */
2113void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2114{
2115 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2116 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2117 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2118 && pPool->iModifiedHead != pPage->idx,
2119 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2120 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2121 pPool->iModifiedHead, pPool->cModifiedPages));
2122
2123 pPage->iModifiedNext = pPool->iModifiedHead;
2124 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2125 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2126 pPool->iModifiedHead = pPage->idx;
2127 pPool->cModifiedPages++;
2128#ifdef VBOX_WITH_STATISTICS
2129 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2130 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2131#endif
2132}
2133
2134
2135/**
2136 * Removes the page from the list of modified pages and resets the
2137 * moficiation counter.
2138 *
2139 * @param pPool The pool.
2140 * @param pPage The page which is believed to be in the list of modified pages.
2141 */
2142static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2143{
2144 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2145 if (pPool->iModifiedHead == pPage->idx)
2146 {
2147 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2148 pPool->iModifiedHead = pPage->iModifiedNext;
2149 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2150 {
2151 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2152 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2153 }
2154 pPool->cModifiedPages--;
2155 }
2156 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2157 {
2158 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2159 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2160 {
2161 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2162 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2163 }
2164 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2165 pPool->cModifiedPages--;
2166 }
2167 else
2168 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2169 pPage->cModifications = 0;
2170}
2171
2172
2173/**
2174 * Zaps the list of modified pages, resetting their modification counters in the process.
2175 *
2176 * @param pVM The VM handle.
2177 */
2178void pgmPoolMonitorModifiedClearAll(PVM pVM)
2179{
2180 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2181 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2182
2183 unsigned cPages = 0; NOREF(cPages);
2184 uint16_t idx = pPool->iModifiedHead;
2185 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2186 while (idx != NIL_PGMPOOL_IDX)
2187 {
2188 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2189 idx = pPage->iModifiedNext;
2190 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2191 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2192 pPage->cModifications = 0;
2193 Assert(++cPages);
2194 }
2195 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2196 pPool->cModifiedPages = 0;
2197}
2198
2199
2200#ifdef IN_RING3
2201/**
2202 * Clear all shadow pages and clear all modification counters.
2203 *
2204 * @param pVM The VM handle.
2205 * @remark Should only be used when monitoring is available, thus placed in
2206 * the PGMPOOL_WITH_MONITORING #ifdef.
2207 */
2208void pgmPoolClearAll(PVM pVM)
2209{
2210 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2211 STAM_PROFILE_START(&pPool->StatClearAll, c);
2212 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2213
2214 /*
2215 * Iterate all the pages until we've encountered all that in use.
2216 * This is simple but not quite optimal solution.
2217 */
2218 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2219 unsigned cLeft = pPool->cUsedPages;
2220 unsigned iPage = pPool->cCurPages;
2221 while (--iPage >= PGMPOOL_IDX_FIRST)
2222 {
2223 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2224 if (pPage->GCPhys != NIL_RTGCPHYS)
2225 {
2226 switch (pPage->enmKind)
2227 {
2228 /*
2229 * We only care about shadow page tables.
2230 */
2231 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2232 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2233 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2234 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2235 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2236 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2237 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2238 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2239 {
2240#ifdef PGMPOOL_WITH_USER_TRACKING
2241 if (pPage->cPresent)
2242#endif
2243 {
2244 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2245 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2246 ASMMemZeroPage(pvShw);
2247 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2248#ifdef PGMPOOL_WITH_USER_TRACKING
2249 pPage->cPresent = 0;
2250 pPage->iFirstPresent = ~0;
2251#endif
2252 }
2253 }
2254 /* fall thru */
2255
2256 default:
2257 Assert(!pPage->cModifications || ++cModifiedPages);
2258 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2259 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2260 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2261 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2262 pPage->cModifications = 0;
2263 break;
2264
2265 }
2266 if (!--cLeft)
2267 break;
2268 }
2269 }
2270
2271 /* swipe the special pages too. */
2272 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2273 {
2274 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2275 if (pPage->GCPhys != NIL_RTGCPHYS)
2276 {
2277 Assert(!pPage->cModifications || ++cModifiedPages);
2278 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2279 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2280 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2281 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2282 pPage->cModifications = 0;
2283 }
2284 }
2285
2286#ifndef DEBUG_michael
2287 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2288#endif
2289 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2290 pPool->cModifiedPages = 0;
2291
2292#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2293 /*
2294 * Clear all the GCPhys links and rebuild the phys ext free list.
2295 */
2296 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2297 pRam;
2298 pRam = pRam->CTX_SUFF(pNext))
2299 {
2300 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2301 while (iPage-- > 0)
2302 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2303 }
2304
2305 pPool->iPhysExtFreeHead = 0;
2306 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2307 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2308 for (unsigned i = 0; i < cMaxPhysExts; i++)
2309 {
2310 paPhysExts[i].iNext = i + 1;
2311 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2312 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2313 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2314 }
2315 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2316#endif
2317
2318
2319 pPool->cPresent = 0;
2320 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2321}
2322#endif /* IN_RING3 */
2323
2324
2325/**
2326 * Handle SyncCR3 pool tasks
2327 *
2328 * @returns VBox status code.
2329 * @retval VINF_SUCCESS if successfully added.
2330 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2331 * @param pVM The VM handle.
2332 * @remark Should only be used when monitoring is available, thus placed in
2333 * the PGMPOOL_WITH_MONITORING #ifdef.
2334 */
2335int pgmPoolSyncCR3(PVM pVM)
2336{
2337 /*
2338 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2339 * Occasionally we will have to clear all the shadow page tables because we wanted
2340 * to monitor a page which was mapped by too many shadowed page tables. This operation
2341 * sometimes refered to as a 'lightweight flush'.
2342 */
2343 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2344 pgmPoolMonitorModifiedClearAll(pVM);
2345 else
2346 {
2347# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2348 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2349 pgmPoolClearAll(pVM);
2350# else /* !IN_RING3 */
2351 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2352 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2353 return VINF_PGM_SYNC_CR3;
2354# endif /* !IN_RING3 */
2355 }
2356 return VINF_SUCCESS;
2357}
2358
2359#endif /* PGMPOOL_WITH_MONITORING */
2360#ifdef PGMPOOL_WITH_USER_TRACKING
2361
2362/**
2363 * Frees up at least one user entry.
2364 *
2365 * @returns VBox status code.
2366 * @retval VINF_SUCCESS if successfully added.
2367 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2368 * @param pPool The pool.
2369 * @param iUser The user index.
2370 */
2371static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2372{
2373 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2374#ifdef PGMPOOL_WITH_CACHE
2375 /*
2376 * Just free cached pages in a braindead fashion.
2377 */
2378 /** @todo walk the age list backwards and free the first with usage. */
2379 int rc = VINF_SUCCESS;
2380 do
2381 {
2382 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2383 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2384 rc = rc2;
2385 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2386 return rc;
2387#else
2388 /*
2389 * Lazy approach.
2390 */
2391 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2392 Assert(!CPUMIsGuestInLongMode(pVM));
2393 pgmPoolFlushAllInt(pPool);
2394 return VERR_PGM_POOL_FLUSHED;
2395#endif
2396}
2397
2398
2399/**
2400 * Inserts a page into the cache.
2401 *
2402 * This will create user node for the page, insert it into the GCPhys
2403 * hash, and insert it into the age list.
2404 *
2405 * @returns VBox status code.
2406 * @retval VINF_SUCCESS if successfully added.
2407 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2408 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2409 * @param pPool The pool.
2410 * @param pPage The cached page.
2411 * @param GCPhys The GC physical address of the page we're gonna shadow.
2412 * @param iUser The user index.
2413 * @param iUserTable The user table index.
2414 */
2415DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2416{
2417 int rc = VINF_SUCCESS;
2418 PPGMPOOLUSER pUser = pPool->CTX_SUFF(paUsers);
2419
2420 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
2421
2422 /*
2423 * Find free a user node.
2424 */
2425 uint16_t i = pPool->iUserFreeHead;
2426 if (i == NIL_PGMPOOL_USER_INDEX)
2427 {
2428 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2429 if (RT_FAILURE(rc))
2430 return rc;
2431 i = pPool->iUserFreeHead;
2432 }
2433
2434 /*
2435 * Unlink the user node from the free list,
2436 * initialize and insert it into the user list.
2437 */
2438 pPool->iUserFreeHead = pUser[i].iNext;
2439 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
2440 pUser[i].iUser = iUser;
2441 pUser[i].iUserTable = iUserTable;
2442 pPage->iUserHead = i;
2443
2444 /*
2445 * Insert into cache and enable monitoring of the guest page if enabled.
2446 *
2447 * Until we implement caching of all levels, including the CR3 one, we'll
2448 * have to make sure we don't try monitor & cache any recursive reuse of
2449 * a monitored CR3 page. Because all windows versions are doing this we'll
2450 * have to be able to do combined access monitoring, CR3 + PT and
2451 * PD + PT (guest PAE).
2452 *
2453 * Update:
2454 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2455 */
2456#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2457# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2458 const bool fCanBeMonitored = true;
2459# else
2460 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2461 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2462 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2463# endif
2464# ifdef PGMPOOL_WITH_CACHE
2465 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2466# endif
2467 if (fCanBeMonitored)
2468 {
2469# ifdef PGMPOOL_WITH_MONITORING
2470 rc = pgmPoolMonitorInsert(pPool, pPage);
2471 if (rc == VERR_PGM_POOL_CLEARED)
2472 {
2473 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2474# ifndef PGMPOOL_WITH_CACHE
2475 pgmPoolMonitorFlush(pPool, pPage);
2476 rc = VERR_PGM_POOL_FLUSHED;
2477# endif
2478 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2479 pUser[i].iNext = pPool->iUserFreeHead;
2480 pUser[i].iUser = NIL_PGMPOOL_IDX;
2481 pPool->iUserFreeHead = i;
2482 }
2483 }
2484# endif
2485#endif /* PGMPOOL_WITH_MONITORING */
2486 return rc;
2487}
2488
2489
2490# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2491/**
2492 * Adds a user reference to a page.
2493 *
2494 * This will
2495 * This will move the page to the head of the
2496 *
2497 * @returns VBox status code.
2498 * @retval VINF_SUCCESS if successfully added.
2499 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2500 * @param pPool The pool.
2501 * @param pPage The cached page.
2502 * @param iUser The user index.
2503 * @param iUserTable The user table.
2504 */
2505static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2506{
2507 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2508
2509 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
2510# ifdef VBOX_STRICT
2511 /*
2512 * Check that the entry doesn't already exists.
2513 */
2514 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2515 {
2516 uint16_t i = pPage->iUserHead;
2517 do
2518 {
2519 Assert(i < pPool->cMaxUsers);
2520 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2521 i = paUsers[i].iNext;
2522 } while (i != NIL_PGMPOOL_USER_INDEX);
2523 }
2524# endif
2525
2526 /*
2527 * Allocate a user node.
2528 */
2529 uint16_t i = pPool->iUserFreeHead;
2530 if (i == NIL_PGMPOOL_USER_INDEX)
2531 {
2532 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2533 if (RT_FAILURE(rc))
2534 return rc;
2535 i = pPool->iUserFreeHead;
2536 }
2537 pPool->iUserFreeHead = paUsers[i].iNext;
2538
2539 /*
2540 * Initialize the user node and insert it.
2541 */
2542 paUsers[i].iNext = pPage->iUserHead;
2543 paUsers[i].iUser = iUser;
2544 paUsers[i].iUserTable = iUserTable;
2545 pPage->iUserHead = i;
2546
2547# ifdef PGMPOOL_WITH_CACHE
2548 /*
2549 * Tell the cache to update its replacement stats for this page.
2550 */
2551 pgmPoolCacheUsed(pPool, pPage);
2552# endif
2553 return VINF_SUCCESS;
2554}
2555# endif /* PGMPOOL_WITH_CACHE */
2556
2557
2558/**
2559 * Frees a user record associated with a page.
2560 *
2561 * This does not clear the entry in the user table, it simply replaces the
2562 * user record to the chain of free records.
2563 *
2564 * @param pPool The pool.
2565 * @param HCPhys The HC physical address of the shadow page.
2566 * @param iUser The shadow page pool index of the user table.
2567 * @param iUserTable The index into the user table (shadowed).
2568 */
2569static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2570{
2571 /*
2572 * Unlink and free the specified user entry.
2573 */
2574 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2575
2576 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2577 uint16_t i = pPage->iUserHead;
2578 if ( i != NIL_PGMPOOL_USER_INDEX
2579 && paUsers[i].iUser == iUser
2580 && paUsers[i].iUserTable == iUserTable)
2581 {
2582 pPage->iUserHead = paUsers[i].iNext;
2583
2584 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2585 paUsers[i].iNext = pPool->iUserFreeHead;
2586 pPool->iUserFreeHead = i;
2587 return;
2588 }
2589
2590 /* General: Linear search. */
2591 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2592 while (i != NIL_PGMPOOL_USER_INDEX)
2593 {
2594 if ( paUsers[i].iUser == iUser
2595 && paUsers[i].iUserTable == iUserTable)
2596 {
2597 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2598 paUsers[iPrev].iNext = paUsers[i].iNext;
2599 else
2600 pPage->iUserHead = paUsers[i].iNext;
2601
2602 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2603 paUsers[i].iNext = pPool->iUserFreeHead;
2604 pPool->iUserFreeHead = i;
2605 return;
2606 }
2607 iPrev = i;
2608 i = paUsers[i].iNext;
2609 }
2610
2611 /* Fatal: didn't find it */
2612 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2613 iUser, iUserTable, pPage->GCPhys));
2614}
2615
2616
2617/**
2618 * Gets the entry size of a shadow table.
2619 *
2620 * @param enmKind The kind of page.
2621 *
2622 * @returns The size of the entry in bytes. That is, 4 or 8.
2623 * @returns If the kind is not for a table, an assertion is raised and 0 is
2624 * returned.
2625 */
2626DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2627{
2628 switch (enmKind)
2629 {
2630 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2631 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2632 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2633#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2634 case PGMPOOLKIND_32BIT_PD:
2635 case PGMPOOLKIND_32BIT_PD_PHYS:
2636#else
2637 case PGMPOOLKIND_ROOT_32BIT_PD:
2638#endif
2639 return 4;
2640
2641 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2642 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2643 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2644 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2645 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2646 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2647 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2648 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2649 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2650 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2651 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2652 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2653 case PGMPOOLKIND_64BIT_PML4:
2654#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2655 case PGMPOOLKIND_ROOT_PAE_PD:
2656 case PGMPOOLKIND_ROOT_PDPT:
2657#endif
2658 case PGMPOOLKIND_PAE_PDPT:
2659 case PGMPOOLKIND_ROOT_NESTED:
2660 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2661 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2662 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2663 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2664 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2665 case PGMPOOLKIND_PAE_PD_PHYS:
2666 case PGMPOOLKIND_PAE_PDPT_PHYS:
2667 return 8;
2668
2669 default:
2670 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2671 }
2672}
2673
2674
2675/**
2676 * Gets the entry size of a guest table.
2677 *
2678 * @param enmKind The kind of page.
2679 *
2680 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2681 * @returns If the kind is not for a table, an assertion is raised and 0 is
2682 * returned.
2683 */
2684DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2685{
2686 switch (enmKind)
2687 {
2688 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2689 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2690#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2691 case PGMPOOLKIND_32BIT_PD:
2692#else
2693 case PGMPOOLKIND_ROOT_32BIT_PD:
2694#endif
2695 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2696 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2697 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2698 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2699 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2700 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2701 return 4;
2702
2703 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2704 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2705 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2706 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2707 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2708 case PGMPOOLKIND_64BIT_PML4:
2709#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2710 case PGMPOOLKIND_PAE_PDPT:
2711#else
2712 case PGMPOOLKIND_ROOT_PAE_PD:
2713 case PGMPOOLKIND_ROOT_PDPT:
2714#endif
2715 return 8;
2716
2717 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2718 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2719 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2720 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2721 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2722 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2723 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2724 case PGMPOOLKIND_ROOT_NESTED:
2725 case PGMPOOLKIND_PAE_PD_PHYS:
2726 case PGMPOOLKIND_PAE_PDPT_PHYS:
2727 case PGMPOOLKIND_32BIT_PD_PHYS:
2728 /** @todo can we return 0? (nobody is calling this...) */
2729 AssertFailed();
2730 return 0;
2731
2732 default:
2733 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2734 }
2735}
2736
2737#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2738
2739/**
2740 * Scans one shadow page table for mappings of a physical page.
2741 *
2742 * @param pVM The VM handle.
2743 * @param pPhysPage The guest page in question.
2744 * @param iShw The shadow page table.
2745 * @param cRefs The number of references made in that PT.
2746 */
2747static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2748{
2749 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2750 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2751
2752 /*
2753 * Assert sanity.
2754 */
2755 Assert(cRefs == 1);
2756 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2757 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2758
2759 /*
2760 * Then, clear the actual mappings to the page in the shadow PT.
2761 */
2762 switch (pPage->enmKind)
2763 {
2764 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2765 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2766 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2767 {
2768 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2769 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2770 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2771 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2772 {
2773 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2774 pPT->a[i].u = 0;
2775 cRefs--;
2776 if (!cRefs)
2777 return;
2778 }
2779#ifdef LOG_ENABLED
2780 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2781 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2782 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2783 {
2784 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2785 pPT->a[i].u = 0;
2786 }
2787#endif
2788 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2789 break;
2790 }
2791
2792 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2793 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2794 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2795 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2796 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2797 {
2798 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2799 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2800 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2801 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2802 {
2803 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2804 pPT->a[i].u = 0;
2805 cRefs--;
2806 if (!cRefs)
2807 return;
2808 }
2809#ifdef LOG_ENABLED
2810 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2811 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2812 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2813 {
2814 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2815 pPT->a[i].u = 0;
2816 }
2817#endif
2818 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
2819 break;
2820 }
2821
2822 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2823 {
2824 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2825 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2826 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2827 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2828 {
2829 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2830 pPT->a[i].u = 0;
2831 cRefs--;
2832 if (!cRefs)
2833 return;
2834 }
2835#ifdef LOG_ENABLED
2836 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2837 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2838 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2839 {
2840 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2841 pPT->a[i].u = 0;
2842 }
2843#endif
2844 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2845 break;
2846 }
2847
2848 default:
2849 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2850 }
2851}
2852
2853
2854/**
2855 * Scans one shadow page table for mappings of a physical page.
2856 *
2857 * @param pVM The VM handle.
2858 * @param pPhysPage The guest page in question.
2859 * @param iShw The shadow page table.
2860 * @param cRefs The number of references made in that PT.
2861 */
2862void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2863{
2864 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2865 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2866 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2867 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2868 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2869 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2870}
2871
2872
2873/**
2874 * Flushes a list of shadow page tables mapping the same physical page.
2875 *
2876 * @param pVM The VM handle.
2877 * @param pPhysPage The guest page in question.
2878 * @param iPhysExt The physical cross reference extent list to flush.
2879 */
2880void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2881{
2882 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2883 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2884 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2885
2886 const uint16_t iPhysExtStart = iPhysExt;
2887 PPGMPOOLPHYSEXT pPhysExt;
2888 do
2889 {
2890 Assert(iPhysExt < pPool->cMaxPhysExts);
2891 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2892 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2893 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2894 {
2895 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2896 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2897 }
2898
2899 /* next */
2900 iPhysExt = pPhysExt->iNext;
2901 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2902
2903 /* insert the list into the free list and clear the ram range entry. */
2904 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2905 pPool->iPhysExtFreeHead = iPhysExtStart;
2906 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2907
2908 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2909}
2910
2911#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2912
2913/**
2914 * Scans all shadow page tables for mappings of a physical page.
2915 *
2916 * This may be slow, but it's most likely more efficient than cleaning
2917 * out the entire page pool / cache.
2918 *
2919 * @returns VBox status code.
2920 * @retval VINF_SUCCESS if all references has been successfully cleared.
2921 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2922 * a page pool cleaning.
2923 *
2924 * @param pVM The VM handle.
2925 * @param pPhysPage The guest page in question.
2926 */
2927int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2928{
2929 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2930 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2931 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2932 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2933
2934#if 1
2935 /*
2936 * There is a limit to what makes sense.
2937 */
2938 if (pPool->cPresent > 1024)
2939 {
2940 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2941 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2942 return VINF_PGM_GCPHYS_ALIASED;
2943 }
2944#endif
2945
2946 /*
2947 * Iterate all the pages until we've encountered all that in use.
2948 * This is simple but not quite optimal solution.
2949 */
2950 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2951 const uint32_t u32 = u64;
2952 unsigned cLeft = pPool->cUsedPages;
2953 unsigned iPage = pPool->cCurPages;
2954 while (--iPage >= PGMPOOL_IDX_FIRST)
2955 {
2956 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2957 if (pPage->GCPhys != NIL_RTGCPHYS)
2958 {
2959 switch (pPage->enmKind)
2960 {
2961 /*
2962 * We only care about shadow page tables.
2963 */
2964 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2965 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2966 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2967 {
2968 unsigned cPresent = pPage->cPresent;
2969 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2970 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2971 if (pPT->a[i].n.u1Present)
2972 {
2973 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2974 {
2975 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2976 pPT->a[i].u = 0;
2977 }
2978 if (!--cPresent)
2979 break;
2980 }
2981 break;
2982 }
2983
2984 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2985 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2986 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2987 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2988 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2989 {
2990 unsigned cPresent = pPage->cPresent;
2991 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2992 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2993 if (pPT->a[i].n.u1Present)
2994 {
2995 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2996 {
2997 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2998 pPT->a[i].u = 0;
2999 }
3000 if (!--cPresent)
3001 break;
3002 }
3003 break;
3004 }
3005 }
3006 if (!--cLeft)
3007 break;
3008 }
3009 }
3010
3011 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3012 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3013 return VINF_SUCCESS;
3014}
3015
3016
3017/**
3018 * Clears the user entry in a user table.
3019 *
3020 * This is used to remove all references to a page when flushing it.
3021 */
3022static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3023{
3024 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3025 Assert(pUser->iUser < pPool->cCurPages);
3026 uint32_t iUserTable = pUser->iUserTable;
3027
3028 /*
3029 * Map the user page.
3030 */
3031 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3032#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3033 if (pUserPage->enmKind == PGMPOOLKIND_ROOT_PAE_PD)
3034 {
3035 /* Must translate the fake 2048 entry PD to a 512 PD one since the R0 mapping is not linear. */
3036 Assert(pUser->iUser == PGMPOOL_IDX_PAE_PD);
3037 uint32_t iPdpt = iUserTable / X86_PG_PAE_ENTRIES;
3038 iUserTable %= X86_PG_PAE_ENTRIES;
3039 pUserPage = &pPool->aPages[PGMPOOL_IDX_PAE_PD_0 + iPdpt];
3040 Assert(pUserPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD);
3041 }
3042#endif
3043 union
3044 {
3045 uint64_t *pau64;
3046 uint32_t *pau32;
3047 } u;
3048 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3049
3050 /* Safety precaution in case we change the paging for other modes too in the future. */
3051 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
3052
3053#ifdef VBOX_STRICT
3054 /*
3055 * Some sanity checks.
3056 */
3057 switch (pUserPage->enmKind)
3058 {
3059# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3060 case PGMPOOLKIND_32BIT_PD:
3061 case PGMPOOLKIND_32BIT_PD_PHYS:
3062 Assert(iUserTable < X86_PG_ENTRIES);
3063 break;
3064# else
3065 case PGMPOOLKIND_ROOT_32BIT_PD:
3066 Assert(iUserTable < X86_PG_ENTRIES);
3067 Assert(!(u.pau32[iUserTable] & PGM_PDFLAGS_MAPPING));
3068 break;
3069# endif
3070# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3071 case PGMPOOLKIND_ROOT_PAE_PD:
3072 Assert(iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
3073 AssertMsg(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING), ("%llx %d\n", u.pau64[iUserTable], iUserTable));
3074 break;
3075# endif
3076# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3077 case PGMPOOLKIND_PAE_PDPT:
3078 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3079 case PGMPOOLKIND_PAE_PDPT_PHYS:
3080# else
3081 case PGMPOOLKIND_ROOT_PDPT:
3082# endif
3083 Assert(iUserTable < 4);
3084 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3085 break;
3086 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3087 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3088 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3089 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3090 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3091 case PGMPOOLKIND_PAE_PD_PHYS:
3092 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3093 break;
3094 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3095 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3096 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3097 break;
3098 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3099 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3100 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3101 break;
3102 case PGMPOOLKIND_64BIT_PML4:
3103 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3104 /* GCPhys >> PAGE_SHIFT is the index here */
3105 break;
3106 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3107 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3108 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3109 break;
3110
3111 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3112 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3113 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3114 break;
3115
3116 case PGMPOOLKIND_ROOT_NESTED:
3117 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3118 break;
3119
3120 default:
3121 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3122 break;
3123 }
3124#endif /* VBOX_STRICT */
3125
3126 /*
3127 * Clear the entry in the user page.
3128 */
3129 switch (pUserPage->enmKind)
3130 {
3131 /* 32-bit entries */
3132#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3133 case PGMPOOLKIND_32BIT_PD:
3134 case PGMPOOLKIND_32BIT_PD_PHYS:
3135#else
3136 case PGMPOOLKIND_ROOT_32BIT_PD:
3137#endif
3138 u.pau32[iUserTable] = 0;
3139 break;
3140
3141 /* 64-bit entries */
3142 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3143 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3144 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3145 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3146 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3147 case PGMPOOLKIND_PAE_PD_PHYS:
3148 case PGMPOOLKIND_PAE_PDPT_PHYS:
3149 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3150 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3151 case PGMPOOLKIND_64BIT_PML4:
3152 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3153 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3154# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3155 case PGMPOOLKIND_ROOT_PAE_PD:
3156#endif
3157#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3158 case PGMPOOLKIND_PAE_PDPT:
3159 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3160#else
3161 case PGMPOOLKIND_ROOT_PDPT:
3162#endif
3163 case PGMPOOLKIND_ROOT_NESTED:
3164 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3165 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3166 u.pau64[iUserTable] = 0;
3167 break;
3168
3169 default:
3170 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3171 }
3172}
3173
3174
3175/**
3176 * Clears all users of a page.
3177 */
3178static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3179{
3180 /*
3181 * Free all the user records.
3182 */
3183 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3184 uint16_t i = pPage->iUserHead;
3185 while (i != NIL_PGMPOOL_USER_INDEX)
3186 {
3187 /* Clear enter in user table. */
3188 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3189
3190 /* Free it. */
3191 const uint16_t iNext = paUsers[i].iNext;
3192 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3193 paUsers[i].iNext = pPool->iUserFreeHead;
3194 pPool->iUserFreeHead = i;
3195
3196 /* Next. */
3197 i = iNext;
3198 }
3199 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3200}
3201
3202#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3203
3204/**
3205 * Allocates a new physical cross reference extent.
3206 *
3207 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3208 * @param pVM The VM handle.
3209 * @param piPhysExt Where to store the phys ext index.
3210 */
3211PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3212{
3213 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3214 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3215 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3216 {
3217 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3218 return NULL;
3219 }
3220 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3221 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3222 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3223 *piPhysExt = iPhysExt;
3224 return pPhysExt;
3225}
3226
3227
3228/**
3229 * Frees a physical cross reference extent.
3230 *
3231 * @param pVM The VM handle.
3232 * @param iPhysExt The extent to free.
3233 */
3234void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3235{
3236 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3237 Assert(iPhysExt < pPool->cMaxPhysExts);
3238 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3239 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3240 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3241 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3242 pPool->iPhysExtFreeHead = iPhysExt;
3243}
3244
3245
3246/**
3247 * Frees a physical cross reference extent.
3248 *
3249 * @param pVM The VM handle.
3250 * @param iPhysExt The extent to free.
3251 */
3252void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3253{
3254 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3255
3256 const uint16_t iPhysExtStart = iPhysExt;
3257 PPGMPOOLPHYSEXT pPhysExt;
3258 do
3259 {
3260 Assert(iPhysExt < pPool->cMaxPhysExts);
3261 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3262 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3263 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3264
3265 /* next */
3266 iPhysExt = pPhysExt->iNext;
3267 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3268
3269 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3270 pPool->iPhysExtFreeHead = iPhysExtStart;
3271}
3272
3273
3274/**
3275 * Insert a reference into a list of physical cross reference extents.
3276 *
3277 * @returns The new ram range flags (top 16-bits).
3278 *
3279 * @param pVM The VM handle.
3280 * @param iPhysExt The physical extent index of the list head.
3281 * @param iShwPT The shadow page table index.
3282 *
3283 */
3284static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3285{
3286 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3287 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3288
3289 /* special common case. */
3290 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3291 {
3292 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3293 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3294 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
3295 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3296 }
3297
3298 /* general treatment. */
3299 const uint16_t iPhysExtStart = iPhysExt;
3300 unsigned cMax = 15;
3301 for (;;)
3302 {
3303 Assert(iPhysExt < pPool->cMaxPhysExts);
3304 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3305 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3306 {
3307 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3308 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3309 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3310 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3311 }
3312 if (!--cMax)
3313 {
3314 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3315 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3316 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
3317 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3318 }
3319 }
3320
3321 /* add another extent to the list. */
3322 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3323 if (!pNew)
3324 {
3325 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3326 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3327 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3328 }
3329 pNew->iNext = iPhysExtStart;
3330 pNew->aidx[0] = iShwPT;
3331 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3332 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3333}
3334
3335
3336/**
3337 * Add a reference to guest physical page where extents are in use.
3338 *
3339 * @returns The new ram range flags (top 16-bits).
3340 *
3341 * @param pVM The VM handle.
3342 * @param u16 The ram range flags (top 16-bits).
3343 * @param iShwPT The shadow page table index.
3344 */
3345uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3346{
3347 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
3348 {
3349 /*
3350 * Convert to extent list.
3351 */
3352 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
3353 uint16_t iPhysExt;
3354 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3355 if (pPhysExt)
3356 {
3357 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
3358 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3359 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
3360 pPhysExt->aidx[1] = iShwPT;
3361 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3362 }
3363 else
3364 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3365 }
3366 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
3367 {
3368 /*
3369 * Insert into the extent list.
3370 */
3371 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
3372 }
3373 else
3374 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3375 return u16;
3376}
3377
3378
3379/**
3380 * Clear references to guest physical memory.
3381 *
3382 * @param pPool The pool.
3383 * @param pPage The page.
3384 * @param pPhysPage Pointer to the aPages entry in the ram range.
3385 */
3386void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3387{
3388 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
3389 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3390
3391 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
3392 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
3393 {
3394 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3395 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3396 do
3397 {
3398 Assert(iPhysExt < pPool->cMaxPhysExts);
3399
3400 /*
3401 * Look for the shadow page and check if it's all freed.
3402 */
3403 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3404 {
3405 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3406 {
3407 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3408
3409 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3410 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3411 {
3412 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3413 return;
3414 }
3415
3416 /* we can free the node. */
3417 PVM pVM = pPool->CTX_SUFF(pVM);
3418 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3419 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3420 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3421 {
3422 /* lonely node */
3423 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3424 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
3425 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3426 }
3427 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3428 {
3429 /* head */
3430 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
3431 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
3432 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
3433 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
3434 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3435 }
3436 else
3437 {
3438 /* in list */
3439 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3440 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3441 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3442 }
3443 iPhysExt = iPhysExtNext;
3444 return;
3445 }
3446 }
3447
3448 /* next */
3449 iPhysExtPrev = iPhysExt;
3450 iPhysExt = paPhysExts[iPhysExt].iNext;
3451 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3452
3453 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3454 }
3455 else /* nothing to do */
3456 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
3457}
3458
3459
3460/**
3461 * Clear references to guest physical memory.
3462 *
3463 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3464 * is assumed to be correct, so the linear search can be skipped and we can assert
3465 * at an earlier point.
3466 *
3467 * @param pPool The pool.
3468 * @param pPage The page.
3469 * @param HCPhys The host physical address corresponding to the guest page.
3470 * @param GCPhys The guest physical address corresponding to HCPhys.
3471 */
3472static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3473{
3474 /*
3475 * Walk range list.
3476 */
3477 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3478 while (pRam)
3479 {
3480 RTGCPHYS off = GCPhys - pRam->GCPhys;
3481 if (off < pRam->cb)
3482 {
3483 /* does it match? */
3484 const unsigned iPage = off >> PAGE_SHIFT;
3485 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3486#ifdef LOG_ENABLED
3487RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3488Log(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3489#endif
3490 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3491 {
3492 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3493 return;
3494 }
3495 break;
3496 }
3497 pRam = pRam->CTX_SUFF(pNext);
3498 }
3499 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3500}
3501
3502
3503/**
3504 * Clear references to guest physical memory.
3505 *
3506 * @param pPool The pool.
3507 * @param pPage The page.
3508 * @param HCPhys The host physical address corresponding to the guest page.
3509 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3510 */
3511static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3512{
3513 /*
3514 * Walk range list.
3515 */
3516 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3517 while (pRam)
3518 {
3519 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3520 if (off < pRam->cb)
3521 {
3522 /* does it match? */
3523 const unsigned iPage = off >> PAGE_SHIFT;
3524 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3525 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3526 {
3527 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3528 return;
3529 }
3530 break;
3531 }
3532 pRam = pRam->CTX_SUFF(pNext);
3533 }
3534
3535 /*
3536 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3537 */
3538 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3539 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3540 while (pRam)
3541 {
3542 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3543 while (iPage-- > 0)
3544 {
3545 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3546 {
3547 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3548 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3549 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3550 return;
3551 }
3552 }
3553 pRam = pRam->CTX_SUFF(pNext);
3554 }
3555
3556 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3557}
3558
3559
3560/**
3561 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3562 *
3563 * @param pPool The pool.
3564 * @param pPage The page.
3565 * @param pShwPT The shadow page table (mapping of the page).
3566 * @param pGstPT The guest page table.
3567 */
3568DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3569{
3570 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3571 if (pShwPT->a[i].n.u1Present)
3572 {
3573 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3574 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3575 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3576 if (!--pPage->cPresent)
3577 break;
3578 }
3579}
3580
3581
3582/**
3583 * Clear references to guest physical memory in a PAE / 32-bit page table.
3584 *
3585 * @param pPool The pool.
3586 * @param pPage The page.
3587 * @param pShwPT The shadow page table (mapping of the page).
3588 * @param pGstPT The guest page table (just a half one).
3589 */
3590DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3591{
3592 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3593 if (pShwPT->a[i].n.u1Present)
3594 {
3595 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3596 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3597 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3598 }
3599}
3600
3601
3602/**
3603 * Clear references to guest physical memory in a PAE / PAE page table.
3604 *
3605 * @param pPool The pool.
3606 * @param pPage The page.
3607 * @param pShwPT The shadow page table (mapping of the page).
3608 * @param pGstPT The guest page table.
3609 */
3610DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3611{
3612 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3613 if (pShwPT->a[i].n.u1Present)
3614 {
3615 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3616 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3617 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3618 }
3619}
3620
3621
3622/**
3623 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3624 *
3625 * @param pPool The pool.
3626 * @param pPage The page.
3627 * @param pShwPT The shadow page table (mapping of the page).
3628 */
3629DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3630{
3631 RTGCPHYS GCPhys = pPage->GCPhys;
3632 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3633 if (pShwPT->a[i].n.u1Present)
3634 {
3635 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3636 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3637 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3638 }
3639}
3640
3641
3642/**
3643 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3644 *
3645 * @param pPool The pool.
3646 * @param pPage The page.
3647 * @param pShwPT The shadow page table (mapping of the page).
3648 */
3649DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3650{
3651 RTGCPHYS GCPhys = pPage->GCPhys;
3652 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3653 if (pShwPT->a[i].n.u1Present)
3654 {
3655 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3656 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3657 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3658 }
3659}
3660
3661#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3662
3663
3664#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3665/**
3666 * Clear references to shadowed pages in a 32 bits page directory.
3667 *
3668 * @param pPool The pool.
3669 * @param pPage The page.
3670 * @param pShwPD The shadow page directory (mapping of the page).
3671 */
3672DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
3673{
3674 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3675 {
3676 if ( pShwPD->a[i].n.u1Present
3677 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3678 )
3679 {
3680 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
3681 if (pSubPage)
3682 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3683 else
3684 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
3685 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3686 }
3687 }
3688}
3689#endif
3690
3691/**
3692 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3693 *
3694 * @param pPool The pool.
3695 * @param pPage The page.
3696 * @param pShwPD The shadow page directory (mapping of the page).
3697 */
3698DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3699{
3700 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3701 {
3702 if ( pShwPD->a[i].n.u1Present
3703#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3704 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3705#endif
3706 )
3707 {
3708 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3709 if (pSubPage)
3710 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3711 else
3712 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3713 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3714 }
3715 }
3716}
3717
3718
3719/**
3720 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3721 *
3722 * @param pPool The pool.
3723 * @param pPage The page.
3724 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3725 */
3726DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3727{
3728 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3729 {
3730 if ( pShwPDPT->a[i].n.u1Present
3731#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3732 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
3733#endif
3734 )
3735 {
3736 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3737 if (pSubPage)
3738 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3739 else
3740 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3741 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3742 }
3743 }
3744}
3745
3746
3747/**
3748 * Clear references to shadowed pages in a 64-bit level 4 page table.
3749 *
3750 * @param pPool The pool.
3751 * @param pPage The page.
3752 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3753 */
3754DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3755{
3756 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3757 {
3758 if (pShwPML4->a[i].n.u1Present)
3759 {
3760 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3761 if (pSubPage)
3762 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3763 else
3764 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3765 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3766 }
3767 }
3768}
3769
3770
3771/**
3772 * Clear references to shadowed pages in an EPT page table.
3773 *
3774 * @param pPool The pool.
3775 * @param pPage The page.
3776 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3777 */
3778DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3779{
3780 RTGCPHYS GCPhys = pPage->GCPhys;
3781 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3782 if (pShwPT->a[i].n.u1Present)
3783 {
3784 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3785 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3786 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3787 }
3788}
3789
3790
3791/**
3792 * Clear references to shadowed pages in an EPT page directory.
3793 *
3794 * @param pPool The pool.
3795 * @param pPage The page.
3796 * @param pShwPD The shadow page directory (mapping of the page).
3797 */
3798DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3799{
3800 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3801 {
3802 if (pShwPD->a[i].n.u1Present)
3803 {
3804 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3805 if (pSubPage)
3806 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3807 else
3808 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3809 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3810 }
3811 }
3812}
3813
3814
3815/**
3816 * Clear references to shadowed pages in an EPT page directory pointer table.
3817 *
3818 * @param pPool The pool.
3819 * @param pPage The page.
3820 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3821 */
3822DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3823{
3824 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3825 {
3826 if (pShwPDPT->a[i].n.u1Present)
3827 {
3828 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3829 if (pSubPage)
3830 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3831 else
3832 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3833 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3834 }
3835 }
3836}
3837
3838
3839/**
3840 * Clears all references made by this page.
3841 *
3842 * This includes other shadow pages and GC physical addresses.
3843 *
3844 * @param pPool The pool.
3845 * @param pPage The page.
3846 */
3847static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3848{
3849 /*
3850 * Map the shadow page and take action according to the page kind.
3851 */
3852 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3853 switch (pPage->enmKind)
3854 {
3855#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3856 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3857 {
3858 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3859 void *pvGst;
3860 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3861 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3862 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3863 break;
3864 }
3865
3866 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3867 {
3868 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3869 void *pvGst;
3870 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3871 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3872 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3873 break;
3874 }
3875
3876 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3877 {
3878 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3879 void *pvGst;
3880 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3881 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3882 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3883 break;
3884 }
3885
3886 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3887 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3888 {
3889 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3890 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3891 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3892 break;
3893 }
3894
3895 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3896 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3897 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3898 {
3899 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3900 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3901 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3902 break;
3903 }
3904
3905#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3906 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3907 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3908 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3909 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3910 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3911 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3912 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3913 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3914 break;
3915#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3916
3917 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3918 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3919 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3920 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3921 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3922 case PGMPOOLKIND_PAE_PD_PHYS:
3923 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3924 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3925 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3926 break;
3927
3928#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3929 case PGMPOOLKIND_32BIT_PD:
3930 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
3931 break;
3932
3933 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3934 case PGMPOOLKIND_PAE_PDPT:
3935 case PGMPOOLKIND_PAE_PDPT_PHYS:
3936#endif
3937 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3938 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3939 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3940 break;
3941
3942 case PGMPOOLKIND_64BIT_PML4:
3943 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3944 break;
3945
3946 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3947 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
3948 break;
3949
3950 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3951 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
3952 break;
3953
3954 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3955 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
3956 break;
3957
3958 default:
3959 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3960 }
3961
3962 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3963 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3964 ASMMemZeroPage(pvShw);
3965 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3966 pPage->fZeroed = true;
3967}
3968
3969#endif /* PGMPOOL_WITH_USER_TRACKING */
3970
3971/**
3972 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3973 *
3974 * @param pPool The pool.
3975 */
3976static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3977{
3978#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3979 /* Start a subset so we won't run out of mapping space. */
3980 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
3981 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3982#endif
3983
3984 /*
3985 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3986 */
3987 Assert(NIL_PGMPOOL_IDX == 0);
3988 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3989 {
3990 /*
3991 * Get the page address.
3992 */
3993 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3994 union
3995 {
3996 uint64_t *pau64;
3997 uint32_t *pau32;
3998 } u;
3999
4000 /*
4001 * Mark stuff not present.
4002 */
4003 switch (pPage->enmKind)
4004 {
4005#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4006 case PGMPOOLKIND_ROOT_32BIT_PD:
4007 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4008 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
4009 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
4010 u.pau32[iPage] = 0;
4011 break;
4012
4013 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4014 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4015 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
4016 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
4017 u.pau64[iPage] = 0;
4018 break;
4019
4020 case PGMPOOLKIND_ROOT_PDPT:
4021 /* Not root of shadowed pages currently, ignore it. */
4022 break;
4023#endif
4024
4025 case PGMPOOLKIND_ROOT_NESTED:
4026 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4027 ASMMemZero32(u.pau64, PAGE_SIZE);
4028 break;
4029 }
4030 }
4031
4032 /*
4033 * Paranoia (to be removed), flag a global CR3 sync.
4034 */
4035 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
4036
4037#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4038 /* Pop the subset. */
4039 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4040#endif
4041}
4042
4043
4044/**
4045 * Flushes the entire cache.
4046 *
4047 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4048 * and execute this CR3 flush.
4049 *
4050 * @param pPool The pool.
4051 */
4052static void pgmPoolFlushAllInt(PPGMPOOL pPool)
4053{
4054 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
4055 LogFlow(("pgmPoolFlushAllInt:\n"));
4056
4057 /*
4058 * If there are no pages in the pool, there is nothing to do.
4059 */
4060 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4061 {
4062 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4063 return;
4064 }
4065
4066 /*
4067 * Nuke the free list and reinsert all pages into it.
4068 */
4069 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4070 {
4071 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4072
4073#ifdef IN_RING3
4074 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMR3, pPage->pvPageR3));
4075#endif
4076#ifdef PGMPOOL_WITH_MONITORING
4077 if (pPage->fMonitored)
4078 pgmPoolMonitorFlush(pPool, pPage);
4079 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4080 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4081 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4082 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4083 pPage->cModifications = 0;
4084#endif
4085 pPage->GCPhys = NIL_RTGCPHYS;
4086 pPage->enmKind = PGMPOOLKIND_FREE;
4087 Assert(pPage->idx == i);
4088 pPage->iNext = i + 1;
4089 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4090 pPage->fSeenNonGlobal = false;
4091 pPage->fMonitored= false;
4092 pPage->fCached = false;
4093 pPage->fReusedFlushPending = false;
4094 pPage->fCR3Mix = false;
4095#ifdef PGMPOOL_WITH_USER_TRACKING
4096 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4097#endif
4098#ifdef PGMPOOL_WITH_CACHE
4099 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4100 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4101#endif
4102 }
4103 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4104 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4105 pPool->cUsedPages = 0;
4106
4107#ifdef PGMPOOL_WITH_USER_TRACKING
4108 /*
4109 * Zap and reinitialize the user records.
4110 */
4111 pPool->cPresent = 0;
4112 pPool->iUserFreeHead = 0;
4113 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4114 const unsigned cMaxUsers = pPool->cMaxUsers;
4115 for (unsigned i = 0; i < cMaxUsers; i++)
4116 {
4117 paUsers[i].iNext = i + 1;
4118 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4119 paUsers[i].iUserTable = 0xfffffffe;
4120 }
4121 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4122#endif
4123
4124#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4125 /*
4126 * Clear all the GCPhys links and rebuild the phys ext free list.
4127 */
4128 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4129 pRam;
4130 pRam = pRam->CTX_SUFF(pNext))
4131 {
4132 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4133 while (iPage-- > 0)
4134 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
4135 }
4136
4137 pPool->iPhysExtFreeHead = 0;
4138 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4139 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4140 for (unsigned i = 0; i < cMaxPhysExts; i++)
4141 {
4142 paPhysExts[i].iNext = i + 1;
4143 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4144 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4145 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4146 }
4147 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4148#endif
4149
4150#ifdef PGMPOOL_WITH_MONITORING
4151 /*
4152 * Just zap the modified list.
4153 */
4154 pPool->cModifiedPages = 0;
4155 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4156#endif
4157
4158#ifdef PGMPOOL_WITH_CACHE
4159 /*
4160 * Clear the GCPhys hash and the age list.
4161 */
4162 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4163 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4164 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4165 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4166#endif
4167
4168 /*
4169 * Flush all the special root pages.
4170 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4171 */
4172 pgmPoolFlushAllSpecialRoots(pPool);
4173 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4174 {
4175 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4176 pPage->iNext = NIL_PGMPOOL_IDX;
4177#ifdef PGMPOOL_WITH_MONITORING
4178 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4179 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4180 pPage->cModifications = 0;
4181 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4182 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4183 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4184 if (pPage->fMonitored)
4185 {
4186 PVM pVM = pPool->CTX_SUFF(pVM);
4187 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4188 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4189 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4190 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4191 pPool->pszAccessHandler);
4192 AssertFatalRCSuccess(rc);
4193# ifdef PGMPOOL_WITH_CACHE
4194 pgmPoolHashInsert(pPool, pPage);
4195# endif
4196 }
4197#endif
4198#ifdef PGMPOOL_WITH_USER_TRACKING
4199 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4200#endif
4201#ifdef PGMPOOL_WITH_CACHE
4202 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4203 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4204#endif
4205 }
4206
4207 /*
4208 * Finally, assert the FF.
4209 */
4210 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
4211
4212 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4213}
4214
4215
4216/**
4217 * Flushes a pool page.
4218 *
4219 * This moves the page to the free list after removing all user references to it.
4220 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
4221 *
4222 * @returns VBox status code.
4223 * @retval VINF_SUCCESS on success.
4224 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
4225 * @param pPool The pool.
4226 * @param HCPhys The HC physical address of the shadow page.
4227 */
4228int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4229{
4230 int rc = VINF_SUCCESS;
4231 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4232 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4233 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4234
4235 /*
4236 * Quietly reject any attempts at flushing any of the special root pages.
4237 */
4238 if (pPage->idx < PGMPOOL_IDX_FIRST)
4239 {
4240 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4241 return VINF_SUCCESS;
4242 }
4243
4244 /*
4245 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4246 */
4247 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
4248 {
4249#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4250 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4,
4251 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4252#endif
4253 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4254 return VINF_SUCCESS;
4255 }
4256
4257#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4258 /* Start a subset so we won't run out of mapping space. */
4259 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4260 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4261#endif
4262
4263 /*
4264 * Mark the page as being in need of a ASMMemZeroPage().
4265 */
4266 pPage->fZeroed = false;
4267
4268#ifdef PGMPOOL_WITH_USER_TRACKING
4269 /*
4270 * Clear the page.
4271 */
4272 pgmPoolTrackClearPageUsers(pPool, pPage);
4273 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4274 pgmPoolTrackDeref(pPool, pPage);
4275 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4276#endif
4277
4278#ifdef PGMPOOL_WITH_CACHE
4279 /*
4280 * Flush it from the cache.
4281 */
4282 pgmPoolCacheFlushPage(pPool, pPage);
4283#endif /* PGMPOOL_WITH_CACHE */
4284
4285#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4286 /* Heavy stuff done. */
4287 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4288#endif
4289
4290#ifdef PGMPOOL_WITH_MONITORING
4291 /*
4292 * Deregistering the monitoring.
4293 */
4294 if (pPage->fMonitored)
4295 rc = pgmPoolMonitorFlush(pPool, pPage);
4296#endif
4297
4298 /*
4299 * Free the page.
4300 */
4301 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4302 pPage->iNext = pPool->iFreeHead;
4303 pPool->iFreeHead = pPage->idx;
4304 pPage->enmKind = PGMPOOLKIND_FREE;
4305 pPage->GCPhys = NIL_RTGCPHYS;
4306 pPage->fReusedFlushPending = false;
4307
4308 pPool->cUsedPages--;
4309 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4310 return rc;
4311}
4312
4313
4314/**
4315 * Frees a usage of a pool page.
4316 *
4317 * The caller is responsible to updating the user table so that it no longer
4318 * references the shadow page.
4319 *
4320 * @param pPool The pool.
4321 * @param HCPhys The HC physical address of the shadow page.
4322 * @param iUser The shadow page pool index of the user table.
4323 * @param iUserTable The index into the user table (shadowed).
4324 */
4325void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4326{
4327 STAM_PROFILE_START(&pPool->StatFree, a);
4328 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4329 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4330 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4331#ifdef PGMPOOL_WITH_USER_TRACKING
4332 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4333#endif
4334#ifdef PGMPOOL_WITH_CACHE
4335 if (!pPage->fCached)
4336#endif
4337 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
4338 STAM_PROFILE_STOP(&pPool->StatFree, a);
4339}
4340
4341
4342/**
4343 * Makes one or more free page free.
4344 *
4345 * @returns VBox status code.
4346 * @retval VINF_SUCCESS on success.
4347 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4348 *
4349 * @param pPool The pool.
4350 * @param iUser The user of the page.
4351 */
4352static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
4353{
4354 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4355
4356 /*
4357 * If the pool isn't full grown yet, expand it.
4358 */
4359 if (pPool->cCurPages < pPool->cMaxPages)
4360 {
4361 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4362#ifdef IN_RING3
4363 int rc = PGMR3PoolGrow(pPool->pVMR3);
4364#else
4365 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
4366#endif
4367 if (RT_FAILURE(rc))
4368 return rc;
4369 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4370 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4371 return VINF_SUCCESS;
4372 }
4373
4374#ifdef PGMPOOL_WITH_CACHE
4375 /*
4376 * Free one cached page.
4377 */
4378 return pgmPoolCacheFreeOne(pPool, iUser);
4379#else
4380 /*
4381 * Flush the pool.
4382 *
4383 * If we have tracking enabled, it should be possible to come up with
4384 * a cheap replacement strategy...
4385 */
4386 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
4387 Assert(!CPUMIsGuestInLongMode(pVM));
4388 pgmPoolFlushAllInt(pPool);
4389 return VERR_PGM_POOL_FLUSHED;
4390#endif
4391}
4392
4393
4394/**
4395 * Allocates a page from the pool.
4396 *
4397 * This page may actually be a cached page and not in need of any processing
4398 * on the callers part.
4399 *
4400 * @returns VBox status code.
4401 * @retval VINF_SUCCESS if a NEW page was allocated.
4402 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4403 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4404 * @param pVM The VM handle.
4405 * @param GCPhys The GC physical address of the page we're gonna shadow.
4406 * For 4MB and 2MB PD entries, it's the first address the
4407 * shadow PT is covering.
4408 * @param enmKind The kind of mapping.
4409 * @param iUser The shadow page pool index of the user table.
4410 * @param iUserTable The index into the user table (shadowed).
4411 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4412 */
4413int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
4414{
4415 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4416 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4417 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4418 *ppPage = NULL;
4419 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4420 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4421 * Assert(!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4422
4423#ifdef PGMPOOL_WITH_CACHE
4424 if (pPool->fCacheEnabled)
4425 {
4426 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
4427 if (RT_SUCCESS(rc2))
4428 {
4429 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4430 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4431 return rc2;
4432 }
4433 }
4434#endif
4435
4436 /*
4437 * Allocate a new one.
4438 */
4439 int rc = VINF_SUCCESS;
4440 uint16_t iNew = pPool->iFreeHead;
4441 if (iNew == NIL_PGMPOOL_IDX)
4442 {
4443 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
4444 if (RT_FAILURE(rc))
4445 {
4446 if (rc != VERR_PGM_POOL_CLEARED)
4447 {
4448 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4449 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4450 return rc;
4451 }
4452 Log(("pgmPoolMakeMoreFreePages failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
4453 rc = VERR_PGM_POOL_FLUSHED;
4454 }
4455 iNew = pPool->iFreeHead;
4456 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4457 }
4458
4459 /* unlink the free head */
4460 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4461 pPool->iFreeHead = pPage->iNext;
4462 pPage->iNext = NIL_PGMPOOL_IDX;
4463
4464 /*
4465 * Initialize it.
4466 */
4467 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4468 pPage->enmKind = enmKind;
4469 pPage->GCPhys = GCPhys;
4470 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4471 pPage->fMonitored = false;
4472 pPage->fCached = false;
4473 pPage->fReusedFlushPending = false;
4474 pPage->fCR3Mix = false;
4475#ifdef PGMPOOL_WITH_MONITORING
4476 pPage->cModifications = 0;
4477 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4478 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4479#endif
4480#ifdef PGMPOOL_WITH_USER_TRACKING
4481 pPage->cPresent = 0;
4482 pPage->iFirstPresent = ~0;
4483
4484 /*
4485 * Insert into the tracking and cache. If this fails, free the page.
4486 */
4487 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4488 if (RT_FAILURE(rc3))
4489 {
4490 if (rc3 != VERR_PGM_POOL_CLEARED)
4491 {
4492 pPool->cUsedPages--;
4493 pPage->enmKind = PGMPOOLKIND_FREE;
4494 pPage->GCPhys = NIL_RTGCPHYS;
4495 pPage->iNext = pPool->iFreeHead;
4496 pPool->iFreeHead = pPage->idx;
4497 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4498 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4499 return rc3;
4500 }
4501 Log(("pgmPoolTrackInsert failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4502 rc = VERR_PGM_POOL_FLUSHED;
4503 }
4504#endif /* PGMPOOL_WITH_USER_TRACKING */
4505
4506 /*
4507 * Commit the allocation, clear the page and return.
4508 */
4509#ifdef VBOX_WITH_STATISTICS
4510 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4511 pPool->cUsedPagesHigh = pPool->cUsedPages;
4512#endif
4513
4514 if (!pPage->fZeroed)
4515 {
4516 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4517 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4518 ASMMemZeroPage(pv);
4519 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4520 }
4521
4522 *ppPage = pPage;
4523 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4524 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4525 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4526 return rc;
4527}
4528
4529
4530/**
4531 * Frees a usage of a pool page.
4532 *
4533 * @param pVM The VM handle.
4534 * @param HCPhys The HC physical address of the shadow page.
4535 * @param iUser The shadow page pool index of the user table.
4536 * @param iUserTable The index into the user table (shadowed).
4537 */
4538void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4539{
4540 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4541 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4542 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4543}
4544
4545
4546/**
4547 * Gets a in-use page in the pool by it's physical address.
4548 *
4549 * @returns Pointer to the page.
4550 * @param pVM The VM handle.
4551 * @param HCPhys The HC physical address of the shadow page.
4552 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4553 */
4554PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4555{
4556 /** @todo profile this! */
4557 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4558 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4559 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%RHp -> %p:{.idx=%d .GCPhys=%RGp .enmKind=%s}\n",
4560 HCPhys, pPage, pPage->idx, pPage->GCPhys, pgmPoolPoolKindToStr(pPage->enmKind)));
4561 return pPage;
4562}
4563
4564
4565/**
4566 * Flushes the entire cache.
4567 *
4568 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4569 * and execute this CR3 flush.
4570 *
4571 * @param pPool The pool.
4572 */
4573void pgmPoolFlushAll(PVM pVM)
4574{
4575 LogFlow(("pgmPoolFlushAll:\n"));
4576 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4577}
4578
4579#ifdef LOG_ENABLED
4580static char *pgmPoolPoolKindToStr(uint8_t enmKind)
4581{
4582 switch(enmKind)
4583 {
4584 case PGMPOOLKIND_INVALID:
4585 return "PGMPOOLKIND_INVALID";
4586 case PGMPOOLKIND_FREE:
4587 return "PGMPOOLKIND_FREE";
4588 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4589 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4590 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4591 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4592 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4593 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4594 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4595 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4596 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4597 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4598 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4599 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
4600 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4601 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
4602 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4603 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
4604 case PGMPOOLKIND_32BIT_PD:
4605 return "PGMPOOLKIND_32BIT_PD";
4606 case PGMPOOLKIND_32BIT_PD_PHYS:
4607 return "PGMPOOLKIND_32BIT_PD_PHYS";
4608 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4609 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
4610 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4611 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
4612 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4613 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
4614 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4615 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
4616 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4617 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
4618 case PGMPOOLKIND_PAE_PD_PHYS:
4619 return "PGMPOOLKIND_PAE_PD_PHYS";
4620 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4621 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
4622 case PGMPOOLKIND_PAE_PDPT:
4623 return "PGMPOOLKIND_PAE_PDPT";
4624 case PGMPOOLKIND_PAE_PDPT_PHYS:
4625 return "PGMPOOLKIND_PAE_PDPT_PHYS";
4626 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4627 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
4628 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4629 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
4630 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4631 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
4632 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4633 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
4634 case PGMPOOLKIND_64BIT_PML4:
4635 return "PGMPOOLKIND_64BIT_PML4";
4636 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4637 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
4638 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4639 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
4640 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4641 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
4642#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4643 case PGMPOOLKIND_ROOT_32BIT_PD:
4644 return "PGMPOOLKIND_ROOT_32BIT_PD";
4645 case PGMPOOLKIND_ROOT_PAE_PD:
4646 return "PGMPOOLKIND_ROOT_PAE_PD";
4647 case PGMPOOLKIND_ROOT_PDPT:
4648 return "PGMPOOLKIND_ROOT_PDPT";
4649#endif
4650 case PGMPOOLKIND_ROOT_NESTED:
4651 return "PGMPOOLKIND_ROOT_NESTED";
4652 }
4653 return "Unknown kind!";
4654}
4655#endif
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette