VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 17015

Last change on this file since 17015 was 17000, checked in by vboxsync, 16 years ago

Rewrote pgmPoolMonitorGCPtr2CCPtr. Deal with invalid GC shadow pages during fault handling.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 172.7 KB
Line 
1/* $Id: PGMAllPool.cpp 17000 2009-02-23 10:01:12Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48__BEGIN_DECLS
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50#ifdef PGMPOOL_WITH_USER_TRACKING
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54#endif
55#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
56static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
57#endif
58#ifdef PGMPOOL_WITH_CACHE
59static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
60#endif
61#ifdef PGMPOOL_WITH_MONITORING
62static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
63#endif
64#ifndef IN_RING3
65DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
66#endif
67#ifdef LOG_ENABLED
68static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
69#endif
70__END_DECLS
71
72
73/**
74 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
75 *
76 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
77 * @param enmKind The page kind.
78 */
79DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
80{
81 switch (enmKind)
82 {
83 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
84 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
85 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
86 return true;
87 default:
88 return false;
89 }
90}
91
92
93#if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
94/**
95 * Maps a pool page into the current context.
96 *
97 * @returns Pointer to the mapping.
98 * @param pPGM Pointer to the PGM instance data.
99 * @param pPage The page to map.
100 */
101void *pgmPoolMapPageFallback(PPGM pPGM, PPGMPOOLPAGE pPage)
102{
103 /* general pages are take care of by the inlined part, it
104 only ends up here in case of failure. */
105 AssertReleaseReturn(pPage->idx < PGMPOOL_IDX_FIRST, NULL);
106
107/** @todo make sure HCPhys is valid for *all* indexes. */
108 /* special pages. */
109# ifdef IN_RC
110 switch (pPage->idx)
111 {
112# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
113 case PGMPOOL_IDX_PD:
114 case PGMPOOL_IDX_PDPT:
115 case PGMPOOL_IDX_AMD64_CR3:
116 return pPGM->pShwRootRC;
117# else
118 case PGMPOOL_IDX_PD:
119 return pPGM->pShw32BitPdRC;
120 case PGMPOOL_IDX_PAE_PD:
121 case PGMPOOL_IDX_PAE_PD_0:
122 return pPGM->apShwPaePDsRC[0];
123 case PGMPOOL_IDX_PAE_PD_1:
124 return pPGM->apShwPaePDsRC[1];
125 case PGMPOOL_IDX_PAE_PD_2:
126 return pPGM->apShwPaePDsRC[2];
127 case PGMPOOL_IDX_PAE_PD_3:
128 return pPGM->apShwPaePDsRC[3];
129 case PGMPOOL_IDX_PDPT:
130 return pPGM->pShwPaePdptRC;
131# endif
132 default:
133 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
134 return NULL;
135 }
136
137# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
138 RTHCPHYS HCPhys;
139 switch (pPage->idx)
140 {
141# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
142 case PGMPOOL_IDX_PD:
143 case PGMPOOL_IDX_PDPT:
144 case PGMPOOL_IDX_AMD64_CR3:
145 HCPhys = pPGM->HCPhysShwCR3;
146 break;
147
148 case PGMPOOL_IDX_NESTED_ROOT:
149 HCPhys = pPGM->HCPhysShwNestedRoot;
150 break;
151# else
152 case PGMPOOL_IDX_PD:
153 HCPhys = pPGM->HCPhysShw32BitPD;
154 break;
155 case PGMPOOL_IDX_PAE_PD_0:
156 HCPhys = pPGM->aHCPhysPaePDs[0];
157 break;
158 case PGMPOOL_IDX_PAE_PD_1:
159 HCPhys = pPGM->aHCPhysPaePDs[1];
160 break;
161 case PGMPOOL_IDX_PAE_PD_2:
162 HCPhys = pPGM->aHCPhysPaePDs[2];
163 break;
164 case PGMPOOL_IDX_PAE_PD_3:
165 HCPhys = pPGM->aHCPhysPaePDs[3];
166 break;
167 case PGMPOOL_IDX_PDPT:
168 HCPhys = pPGM->HCPhysShwPaePdpt;
169 break;
170 case PGMPOOL_IDX_NESTED_ROOT:
171 HCPhys = pPGM->HCPhysShwNestedRoot;
172 break;
173 case PGMPOOL_IDX_PAE_PD:
174 AssertReleaseMsgFailed(("PGMPOOL_IDX_PAE_PD is not usable in VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 context\n"));
175 return NULL;
176# endif
177 default:
178 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
179 return NULL;
180 }
181 AssertMsg(HCPhys && HCPhys != NIL_RTHCPHYS && !(PAGE_OFFSET_MASK & HCPhys), ("%RHp\n", HCPhys));
182
183 void *pv;
184 pgmR0DynMapHCPageInlined(pPGM, HCPhys, &pv);
185 return pv;
186# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
187}
188#endif /* IN_RC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
189
190
191#ifdef PGMPOOL_WITH_MONITORING
192/**
193 * Determin the size of a write instruction.
194 * @returns number of bytes written.
195 * @param pDis The disassembler state.
196 */
197static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
198{
199 /*
200 * This is very crude and possibly wrong for some opcodes,
201 * but since it's not really supposed to be called we can
202 * probably live with that.
203 */
204 return DISGetParamSize(pDis, &pDis->param1);
205}
206
207
208/**
209 * Flushes a chain of pages sharing the same access monitor.
210 *
211 * @returns VBox status code suitable for scheduling.
212 * @param pPool The pool.
213 * @param pPage A page in the chain.
214 */
215int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
216{
217 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
218
219 /*
220 * Find the list head.
221 */
222 uint16_t idx = pPage->idx;
223 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
224 {
225 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
226 {
227 idx = pPage->iMonitoredPrev;
228 Assert(idx != pPage->idx);
229 pPage = &pPool->aPages[idx];
230 }
231 }
232
233 /*
234 * Iterate the list flushing each shadow page.
235 */
236 int rc = VINF_SUCCESS;
237 for (;;)
238 {
239 idx = pPage->iMonitoredNext;
240 Assert(idx != pPage->idx);
241 if (pPage->idx >= PGMPOOL_IDX_FIRST)
242 {
243 int rc2 = pgmPoolFlushPage(pPool, pPage);
244 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
245 rc = VINF_PGM_SYNC_CR3;
246 }
247 /* next */
248 if (idx == NIL_PGMPOOL_IDX)
249 break;
250 pPage = &pPool->aPages[idx];
251 }
252 return rc;
253}
254
255
256/**
257 * Wrapper for getting the current context pointer to the entry being modified.
258 *
259 * @returns VBox status code suitable for scheduling.
260 * @param pVM VM Handle.
261 * @param pvDst Destination address
262 * @param pvSrc Source guest virtual address.
263 * @param GCPhysSrc The source guest physical address.
264 * @param cb Size of data to read
265 */
266DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
267{
268#ifdef IN_RC
269 int rc = MMGCRamRead(pVM, (RTRCPTR)((RTRCUINTPTR)pvDst & ~(cb - 1)), (RTRCPTR)pvSrc, cb);
270 if (RT_FAILURE(rc))
271 rc = PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
272 return rc;
273#elif defined(IN_RING3)
274 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
275 return VINF_SUCCESS;
276#else
277 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
278#endif
279}
280
281/**
282 * Process shadow entries before they are changed by the guest.
283 *
284 * For PT entries we will clear them. For PD entries, we'll simply check
285 * for mapping conflicts and set the SyncCR3 FF if found.
286 *
287 * @param pPool The pool.
288 * @param pPage The head page.
289 * @param GCPhysFault The guest physical fault address.
290 * @param uAddress In R0 and GC this is the guest context fault address (flat).
291 * In R3 this is the host context 'fault' address.
292 * @param pCpu The disassembler state for figuring out the write size.
293 * This need not be specified if the caller knows we won't do cross entry accesses.
294 */
295void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, PDISCPUSTATE pCpu)
296{
297 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
298 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
299 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
300
301 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
302
303 for (;;)
304 {
305 union
306 {
307 void *pv;
308 PX86PT pPT;
309 PX86PTPAE pPTPae;
310 PX86PD pPD;
311 PX86PDPAE pPDPae;
312 PX86PDPT pPDPT;
313 PX86PML4 pPML4;
314 } uShw;
315
316 switch (pPage->enmKind)
317 {
318 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
319 {
320 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
321 const unsigned iShw = off / sizeof(X86PTE);
322 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
323 if (uShw.pPT->a[iShw].n.u1Present)
324 {
325# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
326 X86PTE GstPte;
327
328 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
329 AssertRC(rc);
330 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
331 pgmPoolTracDerefGCPhysHint(pPool, pPage,
332 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
333 GstPte.u & X86_PTE_PG_MASK);
334# endif
335 uShw.pPT->a[iShw].u = 0;
336 }
337 break;
338 }
339
340 /* page/2 sized */
341 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
342 {
343 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
344 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
345 {
346 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
347 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
348 if (uShw.pPTPae->a[iShw].n.u1Present)
349 {
350# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
351 X86PTE GstPte;
352 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
353 AssertRC(rc);
354
355 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
356 pgmPoolTracDerefGCPhysHint(pPool, pPage,
357 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
358 GstPte.u & X86_PTE_PG_MASK);
359# endif
360 uShw.pPTPae->a[iShw].u = 0;
361 }
362 }
363 break;
364 }
365
366# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
367 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
368 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
369 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
370 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
371 {
372 unsigned iGst = off / sizeof(X86PDE);
373 unsigned iShwPdpt = iGst / 256;
374 unsigned iShw = (iGst % 256) * 2;
375 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
376
377 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x idx = %d page idx=%d\n", iGst, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
378 if (iShwPdpt == pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
379 {
380 for (unsigned i=0;i<2;i++)
381 {
382 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
383 {
384 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
385 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
386 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
387 break;
388 }
389 else
390 if (uShw.pPDPae->a[iShw+i].n.u1Present)
391 {
392 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
393 pgmPoolFree(pPool->CTX_SUFF(pVM),
394 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
395 pPage->idx,
396 iShw + i);
397 uShw.pPDPae->a[iShw+i].u = 0;
398 }
399
400 /* paranoia / a bit assumptive. */
401 if ( pCpu
402 && (off & 3)
403 && (off & 3) + cbWrite > 4)
404 {
405 const unsigned iShw2 = iShw + 2 + i;
406 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
407 {
408 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
409 {
410 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
411 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
412 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
413 break;
414 }
415 else
416 if (uShw.pPDPae->a[iShw2].n.u1Present)
417 {
418 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
419 pgmPoolFree(pPool->CTX_SUFF(pVM),
420 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
421 pPage->idx,
422 iShw2);
423 uShw.pPDPae->a[iShw2].u = 0;
424 }
425 }
426 }
427 }
428 }
429 break;
430 }
431# endif
432
433
434 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
435 {
436 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
437 const unsigned iShw = off / sizeof(X86PTEPAE);
438 if (uShw.pPTPae->a[iShw].n.u1Present)
439 {
440# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
441 X86PTEPAE GstPte;
442 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
443 AssertRC(rc);
444
445 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
446 pgmPoolTracDerefGCPhysHint(pPool, pPage,
447 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
448 GstPte.u & X86_PTE_PAE_PG_MASK);
449# endif
450 uShw.pPTPae->a[iShw].u = 0;
451 }
452
453 /* paranoia / a bit assumptive. */
454 if ( pCpu
455 && (off & 7)
456 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
457 {
458 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
459 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
460
461 if (uShw.pPTPae->a[iShw2].n.u1Present)
462 {
463# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
464 X86PTEPAE GstPte;
465# ifdef IN_RING3
466 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
467# else
468 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
469# endif
470 AssertRC(rc);
471 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
472 pgmPoolTracDerefGCPhysHint(pPool, pPage,
473 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
474 GstPte.u & X86_PTE_PAE_PG_MASK);
475# endif
476 uShw.pPTPae->a[iShw2].u = 0;
477 }
478 }
479 break;
480 }
481
482# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
483 case PGMPOOLKIND_32BIT_PD:
484# else
485 case PGMPOOLKIND_ROOT_32BIT_PD:
486# endif
487 {
488 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
489 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
490
491# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
492 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
493# endif
494 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
495 {
496 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
497 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
498 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
499 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
500 break;
501 }
502# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
503 else
504 {
505 if (uShw.pPD->a[iShw].n.u1Present)
506 {
507 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
508 pgmPoolFree(pPool->CTX_SUFF(pVM),
509 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
510 pPage->idx,
511 iShw);
512 uShw.pPD->a[iShw].u = 0;
513 }
514 }
515# endif
516 /* paranoia / a bit assumptive. */
517 if ( pCpu
518 && (off & 3)
519 && (off & 3) + cbWrite > sizeof(X86PTE))
520 {
521 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
522 if ( iShw2 != iShw
523 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
524 {
525
526 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
527 {
528 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
529 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
530 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
531 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
532 }
533# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
534 else
535 {
536 if (uShw.pPD->a[iShw2].n.u1Present)
537 {
538 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
539 pgmPoolFree(pPool->CTX_SUFF(pVM),
540 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
541 pPage->idx,
542 iShw2);
543 uShw.pPD->a[iShw2].u = 0;
544 }
545 }
546# endif
547 }
548 }
549#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
550 if ( uShw.pPD->a[iShw].n.u1Present
551 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
552 {
553 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
554# ifdef IN_RC /* TLB load - we're pushing things a bit... */
555 ASMProbeReadByte(pvAddress);
556# endif
557 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
558 uShw.pPD->a[iShw].u = 0;
559 }
560#endif
561 break;
562 }
563
564# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
565 case PGMPOOLKIND_ROOT_PAE_PD:
566 {
567 unsigned iGst = off / sizeof(X86PDE); // ASSUMING 32-bit guest paging!
568 unsigned iShwPdpt = iGst / 256;
569 unsigned iShw = (iGst % 256) * 2;
570 Assert(pPage->idx == PGMPOOL_IDX_PAE_PD);
571 PPGMPOOLPAGE pPage2 = pPage + 1 + iShwPdpt;
572 Assert(pPage2->idx == PGMPOOL_IDX_PAE_PD_0 + iShwPdpt);
573 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage2);
574 for (unsigned i = 0; i < 2; i++, iShw++)
575 {
576 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
577 {
578 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
579 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
580 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw));
581 }
582 /* paranoia / a bit assumptive. */
583 else if ( pCpu
584 && (off & 3)
585 && (off & 3) + cbWrite > 4)
586 {
587 const unsigned iShw2 = iShw + 2;
588 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a) /** @todo was completely wrong, it's better now after #1865 but still wrong from cross PD. */
589 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
590 {
591 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
592 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
593 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
594 }
595 }
596#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
597 if ( uShw.pPDPae->a[iShw].n.u1Present
598 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
599 {
600 LogFlow(("pgmPoolMonitorChainChanging: iShwPdpt=%#x iShw=%#x: %RX64 -> freeing it!\n", iShwPdpt, iShw, uShw.pPDPae->a[iShw].u));
601# ifdef IN_RC /* TLB load - we're pushing things a bit... */
602 ASMProbeReadByte(pvAddress);
603# endif
604 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + iShwPdpt * X86_PG_PAE_ENTRIES);
605 uShw.pPDPae->a[iShw].u = 0;
606 }
607#endif
608 }
609 break;
610 }
611# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
612
613 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
614 {
615 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
616 const unsigned iShw = off / sizeof(X86PDEPAE);
617 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
618 {
619 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
620 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
621 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
622 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
623 break;
624 }
625#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
626 /*
627 * Causes trouble when the guest uses a PDE to refer to the whole page table level
628 * structure. (Invalidate here; faults later on when it tries to change the page
629 * table entries -> recheck; probably only applies to the RC case.)
630 */
631 else
632 {
633 if (uShw.pPDPae->a[iShw].n.u1Present)
634 {
635 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
636 pgmPoolFree(pPool->CTX_SUFF(pVM),
637 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
638# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
639 pPage->idx,
640 iShw);
641# else
642 /* Note: hardcoded PAE implementation dependency */
643 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
644 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
645# endif
646 uShw.pPDPae->a[iShw].u = 0;
647 }
648 }
649#endif
650 /* paranoia / a bit assumptive. */
651 if ( pCpu
652 && (off & 7)
653 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
654 {
655 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
656 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
657
658 if ( iShw2 != iShw
659 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
660 {
661 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
662 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
663 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
664 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
665 }
666#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
667 else if (uShw.pPDPae->a[iShw2].n.u1Present)
668 {
669 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
670 pgmPoolFree(pPool->CTX_SUFF(pVM),
671 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
672# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
673 pPage->idx,
674 iShw2);
675# else
676 /* Note: hardcoded PAE implementation dependency */
677 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
678 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
679# endif
680 uShw.pPDPae->a[iShw2].u = 0;
681 }
682#endif
683 }
684 break;
685 }
686
687# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
688 case PGMPOOLKIND_PAE_PDPT:
689# else
690 case PGMPOOLKIND_ROOT_PDPT:
691# endif
692 {
693 /*
694 * Hopefully this doesn't happen very often:
695 * - touching unused parts of the page
696 * - messing with the bits of pd pointers without changing the physical address
697 */
698# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
699 /* PDPT roots are not page aligned; 32 byte only! */
700 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
701# else
702 const unsigned offPdpt = off;
703# endif
704 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
705 const unsigned iShw = offPdpt / sizeof(X86PDPE);
706 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
707 {
708 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
709 {
710 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
711 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
712 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
713 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
714 break;
715 }
716# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
717 else
718 if (uShw.pPDPT->a[iShw].n.u1Present)
719 {
720 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
721 pgmPoolFree(pPool->CTX_SUFF(pVM),
722 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
723 pPage->idx,
724 iShw);
725 uShw.pPDPT->a[iShw].u = 0;
726 }
727# endif
728
729 /* paranoia / a bit assumptive. */
730 if ( pCpu
731 && (offPdpt & 7)
732 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
733 {
734 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
735 if ( iShw2 != iShw
736 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
737 {
738 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
739 {
740 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
741 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
742 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
743 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
744 }
745# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
746 else
747 if (uShw.pPDPT->a[iShw2].n.u1Present)
748 {
749 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
750 pgmPoolFree(pPool->CTX_SUFF(pVM),
751 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
752 pPage->idx,
753 iShw2);
754 uShw.pPDPT->a[iShw2].u = 0;
755 }
756# endif
757 }
758 }
759 }
760 break;
761 }
762
763#ifndef IN_RC
764 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
765 {
766 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
767 const unsigned iShw = off / sizeof(X86PDEPAE);
768 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
769 {
770 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
771 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
772 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
773 }
774 else
775 {
776 if (uShw.pPDPae->a[iShw].n.u1Present)
777 {
778 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
779 pgmPoolFree(pPool->CTX_SUFF(pVM),
780 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
781 pPage->idx,
782 iShw);
783 uShw.pPDPae->a[iShw].u = 0;
784 }
785 }
786 /* paranoia / a bit assumptive. */
787 if ( pCpu
788 && (off & 7)
789 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
790 {
791 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
792 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
793
794 if ( iShw2 != iShw
795 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
796 {
797 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
798 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
799 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
800 }
801 else
802 if (uShw.pPDPae->a[iShw2].n.u1Present)
803 {
804 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
805 pgmPoolFree(pPool->CTX_SUFF(pVM),
806 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
807 pPage->idx,
808 iShw2);
809 uShw.pPDPae->a[iShw2].u = 0;
810 }
811 }
812 break;
813 }
814
815 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
816 {
817 /*
818 * Hopefully this doesn't happen very often:
819 * - messing with the bits of pd pointers without changing the physical address
820 */
821# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
822 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
823# endif
824 {
825 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
826 const unsigned iShw = off / sizeof(X86PDPE);
827 if (uShw.pPDPT->a[iShw].n.u1Present)
828 {
829 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
830 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
831 uShw.pPDPT->a[iShw].u = 0;
832 }
833 /* paranoia / a bit assumptive. */
834 if ( pCpu
835 && (off & 7)
836 && (off & 7) + cbWrite > sizeof(X86PDPE))
837 {
838 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
839 if (uShw.pPDPT->a[iShw2].n.u1Present)
840 {
841 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
842 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
843 uShw.pPDPT->a[iShw2].u = 0;
844 }
845 }
846 }
847 break;
848 }
849
850 case PGMPOOLKIND_64BIT_PML4:
851 {
852 /*
853 * Hopefully this doesn't happen very often:
854 * - messing with the bits of pd pointers without changing the physical address
855 */
856# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
857 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
858# endif
859 {
860 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
861 const unsigned iShw = off / sizeof(X86PDPE);
862 if (uShw.pPML4->a[iShw].n.u1Present)
863 {
864 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
865 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
866 uShw.pPML4->a[iShw].u = 0;
867 }
868 /* paranoia / a bit assumptive. */
869 if ( pCpu
870 && (off & 7)
871 && (off & 7) + cbWrite > sizeof(X86PDPE))
872 {
873 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
874 if (uShw.pPML4->a[iShw2].n.u1Present)
875 {
876 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
877 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
878 uShw.pPML4->a[iShw2].u = 0;
879 }
880 }
881 }
882 break;
883 }
884#endif /* IN_RING0 */
885
886 default:
887 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
888 }
889
890 /* next */
891 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
892 return;
893 pPage = &pPool->aPages[pPage->iMonitoredNext];
894 }
895}
896
897#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
898/**
899 * Checks if the page is the active CR3 or is one of the four PDs of a PAE PDPT
900 *
901 * @returns VBox status code (appropriate for GC return).
902 * @param pVM VM Handle.
903 * @param pPage PGM pool page
904 */
905bool pgmPoolIsActiveRootPage(PVM pVM, PPGMPOOLPAGE pPage)
906{
907 /* First check the simple case. */
908 if (pPage == pVM->pgm.s.CTX_SUFF(pShwPageCR3))
909 {
910 LogFlow(("pgmPoolIsActiveRootPage found CR3 root\n"));
911 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
912 return true;
913 }
914
915 switch (PGMGetShadowMode(pVM))
916 {
917 case PGMMODE_PAE:
918 case PGMMODE_PAE_NX:
919 {
920 switch (pPage->enmKind)
921 {
922 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
923 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
924 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
925 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
926 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
927 {
928 PX86PDPT pPdpt = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
929 Assert(pPdpt);
930
931 for (unsigned i=0;i<X86_PG_PAE_PDPE_ENTRIES;i++)
932 {
933 if ( pPdpt->a[i].n.u1Present
934 && pPage->Core.Key == pPdpt->a[i].u & X86_PDPE_PG_MASK)
935 {
936 LogFlow(("pgmPoolIsActiveRootPage found PAE PDPE root\n"));
937 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
938 return true;
939 }
940 }
941 break;
942 }
943 }
944
945 break;
946 }
947 }
948
949 return false;
950}
951#endif /* VBOX_WITH_PGMPOOL_PAGING_ONLY */
952
953
954# ifndef IN_RING3
955/**
956 * Checks if a access could be a fork operation in progress.
957 *
958 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
959 *
960 * @returns true if it's likly that we're forking, otherwise false.
961 * @param pPool The pool.
962 * @param pCpu The disassembled instruction.
963 * @param offFault The access offset.
964 */
965DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
966{
967 /*
968 * i386 linux is using btr to clear X86_PTE_RW.
969 * The functions involved are (2.6.16 source inspection):
970 * clear_bit
971 * ptep_set_wrprotect
972 * copy_one_pte
973 * copy_pte_range
974 * copy_pmd_range
975 * copy_pud_range
976 * copy_page_range
977 * dup_mmap
978 * dup_mm
979 * copy_mm
980 * copy_process
981 * do_fork
982 */
983 if ( pCpu->pCurInstr->opcode == OP_BTR
984 && !(offFault & 4)
985 /** @todo Validate that the bit index is X86_PTE_RW. */
986 )
987 {
988 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
989 return true;
990 }
991 return false;
992}
993
994
995/**
996 * Determine whether the page is likely to have been reused.
997 *
998 * @returns true if we consider the page as being reused for a different purpose.
999 * @returns false if we consider it to still be a paging page.
1000 * @param pVM VM Handle.
1001 * @param pPage The page in question.
1002 * @param pRegFrame Trap register frame.
1003 * @param pCpu The disassembly info for the faulting instruction.
1004 * @param pvFault The fault address.
1005 *
1006 * @remark The REP prefix check is left to the caller because of STOSD/W.
1007 */
1008DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
1009{
1010#ifndef IN_RC
1011 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
1012 if ( HWACCMHasPendingIrq(pVM)
1013 && (pRegFrame->rsp - pvFault) < 32)
1014 {
1015 /* Fault caused by stack writes while trying to inject an interrupt event. */
1016 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
1017 return true;
1018 }
1019#else
1020 NOREF(pVM); NOREF(pvFault);
1021#endif
1022
1023 switch (pCpu->pCurInstr->opcode)
1024 {
1025 /* call implies the actual push of the return address faulted */
1026 case OP_CALL:
1027 Log4(("pgmPoolMonitorIsReused: CALL\n"));
1028 return true;
1029 case OP_PUSH:
1030 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
1031 return true;
1032 case OP_PUSHF:
1033 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
1034 return true;
1035 case OP_PUSHA:
1036 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
1037 return true;
1038 case OP_FXSAVE:
1039 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
1040 return true;
1041 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
1042 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
1043 return true;
1044 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
1045 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
1046 return true;
1047 case OP_MOVSWD:
1048 case OP_STOSWD:
1049 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
1050 && pRegFrame->rcx >= 0x40
1051 )
1052 {
1053 Assert(pCpu->mode == CPUMODE_64BIT);
1054
1055 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
1056 return true;
1057 }
1058 return false;
1059 }
1060 if ( (pCpu->param1.flags & USE_REG_GEN32)
1061 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
1062 {
1063 Log4(("pgmPoolMonitorIsReused: ESP\n"));
1064 return true;
1065 }
1066
1067 //if (pPage->fCR3Mix)
1068 // return false;
1069 return false;
1070}
1071
1072
1073/**
1074 * Flushes the page being accessed.
1075 *
1076 * @returns VBox status code suitable for scheduling.
1077 * @param pVM The VM handle.
1078 * @param pPool The pool.
1079 * @param pPage The pool page (head).
1080 * @param pCpu The disassembly of the write instruction.
1081 * @param pRegFrame The trap register frame.
1082 * @param GCPhysFault The fault address as guest physical address.
1083 * @param pvFault The fault address.
1084 */
1085static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1086 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1087{
1088 /*
1089 * First, do the flushing.
1090 */
1091 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
1092
1093 /*
1094 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
1095 */
1096 uint32_t cbWritten;
1097 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
1098 if (RT_SUCCESS(rc2))
1099 pRegFrame->rip += pCpu->opsize;
1100 else if (rc2 == VERR_EM_INTERPRETER)
1101 {
1102#ifdef IN_RC
1103 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
1104 {
1105 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
1106 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
1107 rc = VINF_SUCCESS;
1108 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
1109 }
1110 else
1111#endif
1112 {
1113 rc = VINF_EM_RAW_EMULATE_INSTR;
1114 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1115 }
1116 }
1117 else
1118 rc = rc2;
1119
1120 /* See use in pgmPoolAccessHandlerSimple(). */
1121 PGM_INVL_GUEST_TLBS();
1122
1123 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
1124 return rc;
1125
1126}
1127
1128
1129/**
1130 * Handles the STOSD write accesses.
1131 *
1132 * @returns VBox status code suitable for scheduling.
1133 * @param pVM The VM handle.
1134 * @param pPool The pool.
1135 * @param pPage The pool page (head).
1136 * @param pCpu The disassembly of the write instruction.
1137 * @param pRegFrame The trap register frame.
1138 * @param GCPhysFault The fault address as guest physical address.
1139 * @param pvFault The fault address.
1140 */
1141DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1142 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1143{
1144 Assert(pCpu->mode == CPUMODE_32BIT);
1145
1146 Log3(("pgmPoolAccessHandlerSTOSD\n"));
1147
1148 /*
1149 * Increment the modification counter and insert it into the list
1150 * of modified pages the first time.
1151 */
1152 if (!pPage->cModifications++)
1153 pgmPoolMonitorModifiedInsert(pPool, pPage);
1154
1155 /*
1156 * Execute REP STOSD.
1157 *
1158 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
1159 * write situation, meaning that it's safe to write here.
1160 */
1161#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1162 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1163#endif
1164 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
1165 while (pRegFrame->ecx)
1166 {
1167#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1168 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1169 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1170 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1171#else
1172 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1173#endif
1174#ifdef IN_RC
1175 *(uint32_t *)pu32 = pRegFrame->eax;
1176#else
1177 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
1178#endif
1179 pu32 += 4;
1180 GCPhysFault += 4;
1181 pRegFrame->edi += 4;
1182 pRegFrame->ecx--;
1183 }
1184 pRegFrame->rip += pCpu->opsize;
1185
1186 /* See use in pgmPoolAccessHandlerSimple(). */
1187 PGM_INVL_GUEST_TLBS();
1188
1189 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1190 return VINF_SUCCESS;
1191}
1192
1193
1194/**
1195 * Handles the simple write accesses.
1196 *
1197 * @returns VBox status code suitable for scheduling.
1198 * @param pVM The VM handle.
1199 * @param pPool The pool.
1200 * @param pPage The pool page (head).
1201 * @param pCpu The disassembly of the write instruction.
1202 * @param pRegFrame The trap register frame.
1203 * @param GCPhysFault The fault address as guest physical address.
1204 * @param pvFault The fault address.
1205 */
1206DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1207 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1208{
1209 Log3(("pgmPoolAccessHandlerSimple\n"));
1210 /*
1211 * Increment the modification counter and insert it into the list
1212 * of modified pages the first time.
1213 */
1214 if (!pPage->cModifications++)
1215 pgmPoolMonitorModifiedInsert(pPool, pPage);
1216
1217 /*
1218 * Clear all the pages. ASSUMES that pvFault is readable.
1219 */
1220#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1221 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1222 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1223 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1224 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1225#else
1226 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1227#endif
1228
1229 /*
1230 * Interpret the instruction.
1231 */
1232 uint32_t cb;
1233 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
1234 if (RT_SUCCESS(rc))
1235 pRegFrame->rip += pCpu->opsize;
1236 else if (rc == VERR_EM_INTERPRETER)
1237 {
1238 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1239 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
1240 rc = VINF_EM_RAW_EMULATE_INSTR;
1241 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1242 }
1243
1244 /*
1245 * Quick hack, with logging enabled we're getting stale
1246 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1247 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1248 * have to be fixed to support this. But that'll have to wait till next week.
1249 *
1250 * An alternative is to keep track of the changed PTEs together with the
1251 * GCPhys from the guest PT. This may proove expensive though.
1252 *
1253 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1254 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1255 */
1256 PGM_INVL_GUEST_TLBS();
1257
1258 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1259 return rc;
1260}
1261
1262/**
1263 * \#PF Handler callback for PT write accesses.
1264 *
1265 * @returns VBox status code (appropriate for GC return).
1266 * @param pVM VM Handle.
1267 * @param uErrorCode CPU Error code.
1268 * @param pRegFrame Trap register frame.
1269 * NULL on DMA and other non CPU access.
1270 * @param pvFault The fault address (cr2).
1271 * @param GCPhysFault The GC physical address corresponding to pvFault.
1272 * @param pvUser User argument.
1273 */
1274DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1275{
1276 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1277 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1278 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1279 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1280
1281 /*
1282 * We should ALWAYS have the list head as user parameter. This
1283 * is because we use that page to record the changes.
1284 */
1285 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1286
1287 /*
1288 * Disassemble the faulting instruction.
1289 */
1290 DISCPUSTATE Cpu;
1291 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1292 AssertRCReturn(rc, rc);
1293
1294 /*
1295 * Check if it's worth dealing with.
1296 */
1297 bool fReused = false;
1298 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1299#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1300 || pgmPoolIsActiveRootPage(pVM, pPage)
1301#else
1302 || pPage->fCR3Mix
1303#endif
1304 )
1305 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1306 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1307 {
1308 /*
1309 * Simple instructions, no REP prefix.
1310 */
1311 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1312 {
1313 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1314 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1315 return rc;
1316 }
1317
1318 /*
1319 * Windows is frequently doing small memset() operations (netio test 4k+).
1320 * We have to deal with these or we'll kill the cache and performance.
1321 */
1322 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1323 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1324 && pRegFrame->ecx <= 0x20
1325 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1326 && !((uintptr_t)pvFault & 3)
1327 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1328 && Cpu.mode == CPUMODE_32BIT
1329 && Cpu.opmode == CPUMODE_32BIT
1330 && Cpu.addrmode == CPUMODE_32BIT
1331 && Cpu.prefix == PREFIX_REP
1332 && !pRegFrame->eflags.Bits.u1DF
1333 )
1334 {
1335 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1336 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1337 return rc;
1338 }
1339
1340 /* REP prefix, don't bother. */
1341 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1342 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1343 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1344 }
1345
1346 /*
1347 * Not worth it, so flush it.
1348 *
1349 * If we considered it to be reused, don't to back to ring-3
1350 * to emulate failed instructions since we usually cannot
1351 * interpret then. This may be a bit risky, in which case
1352 * the reuse detection must be fixed.
1353 */
1354 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1355 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1356 rc = VINF_SUCCESS;
1357 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1358 return rc;
1359}
1360
1361# endif /* !IN_RING3 */
1362#endif /* PGMPOOL_WITH_MONITORING */
1363
1364#ifdef PGMPOOL_WITH_CACHE
1365
1366/**
1367 * Inserts a page into the GCPhys hash table.
1368 *
1369 * @param pPool The pool.
1370 * @param pPage The page.
1371 */
1372DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1373{
1374 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1375 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1376 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1377 pPage->iNext = pPool->aiHash[iHash];
1378 pPool->aiHash[iHash] = pPage->idx;
1379}
1380
1381
1382/**
1383 * Removes a page from the GCPhys hash table.
1384 *
1385 * @param pPool The pool.
1386 * @param pPage The page.
1387 */
1388DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1389{
1390 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1391 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1392 if (pPool->aiHash[iHash] == pPage->idx)
1393 pPool->aiHash[iHash] = pPage->iNext;
1394 else
1395 {
1396 uint16_t iPrev = pPool->aiHash[iHash];
1397 for (;;)
1398 {
1399 const int16_t i = pPool->aPages[iPrev].iNext;
1400 if (i == pPage->idx)
1401 {
1402 pPool->aPages[iPrev].iNext = pPage->iNext;
1403 break;
1404 }
1405 if (i == NIL_PGMPOOL_IDX)
1406 {
1407 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1408 break;
1409 }
1410 iPrev = i;
1411 }
1412 }
1413 pPage->iNext = NIL_PGMPOOL_IDX;
1414}
1415
1416
1417/**
1418 * Frees up one cache page.
1419 *
1420 * @returns VBox status code.
1421 * @retval VINF_SUCCESS on success.
1422 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1423 * @param pPool The pool.
1424 * @param iUser The user index.
1425 */
1426static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1427{
1428#ifndef IN_RC
1429 const PVM pVM = pPool->CTX_SUFF(pVM);
1430#endif
1431 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1432 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1433
1434 /*
1435 * Select one page from the tail of the age list.
1436 */
1437 uint16_t iToFree = pPool->iAgeTail;
1438 if (iToFree == iUser)
1439 iToFree = pPool->aPages[iToFree].iAgePrev;
1440/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1441 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1442 {
1443 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1444 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1445 {
1446 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1447 continue;
1448 iToFree = i;
1449 break;
1450 }
1451 }
1452*/
1453
1454 Assert(iToFree != iUser);
1455 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1456
1457 PPGMPOOLPAGE pPage = &pPool->aPages[iToFree];
1458
1459 /*
1460 * Reject any attempts at flushing the currently active shadow CR3 mapping
1461 */
1462 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
1463 {
1464 /* Refresh the cr3 mapping by putting it at the head of the age list. */
1465 LogFlow(("pgmPoolCacheFreeOne refuse CR3 mapping\n"));
1466 pgmPoolCacheUsed(pPool, pPage);
1467 return pgmPoolCacheFreeOne(pPool, iUser);
1468 }
1469
1470 int rc = pgmPoolFlushPage(pPool, pPage);
1471 if (rc == VINF_SUCCESS)
1472 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1473 return rc;
1474}
1475
1476
1477/**
1478 * Checks if a kind mismatch is really a page being reused
1479 * or if it's just normal remappings.
1480 *
1481 * @returns true if reused and the cached page (enmKind1) should be flushed
1482 * @returns false if not reused.
1483 * @param enmKind1 The kind of the cached page.
1484 * @param enmKind2 The kind of the requested page.
1485 */
1486static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1487{
1488 switch (enmKind1)
1489 {
1490 /*
1491 * Never reuse them. There is no remapping in non-paging mode.
1492 */
1493 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1494 case PGMPOOLKIND_32BIT_PD_PHYS:
1495 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1496 case PGMPOOLKIND_PAE_PD_PHYS:
1497 case PGMPOOLKIND_PAE_PDPT_PHYS:
1498 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1499 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1500 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1501 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1502 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1503#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1504 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1505 return false;
1506#else
1507 return true;
1508#endif
1509
1510 /*
1511 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1512 */
1513 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1514 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1515 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1516 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1517 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1518 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1519 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1520 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1521 case PGMPOOLKIND_32BIT_PD:
1522 switch (enmKind2)
1523 {
1524 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1525 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1526 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1527 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1528 case PGMPOOLKIND_64BIT_PML4:
1529 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1530 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1531 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1532 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1533 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1534 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1535 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1536 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1537 return true;
1538 default:
1539 return false;
1540 }
1541
1542 /*
1543 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1544 */
1545 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1546 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1547 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1548 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1549 case PGMPOOLKIND_64BIT_PML4:
1550 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1551 switch (enmKind2)
1552 {
1553 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1554 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1555 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1556 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1557 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1558 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1559 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1560 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1561 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1562 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1563 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1564 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1565 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1566 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1567 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1568 return true;
1569 default:
1570 return false;
1571 }
1572
1573 /*
1574 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1575 */
1576#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1577 case PGMPOOLKIND_ROOT_32BIT_PD:
1578 case PGMPOOLKIND_ROOT_PAE_PD:
1579 case PGMPOOLKIND_ROOT_PDPT:
1580#endif
1581 case PGMPOOLKIND_ROOT_NESTED:
1582 return false;
1583
1584 default:
1585 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1586 }
1587}
1588
1589
1590/**
1591 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1592 *
1593 * @returns VBox status code.
1594 * @retval VINF_PGM_CACHED_PAGE on success.
1595 * @retval VERR_FILE_NOT_FOUND if not found.
1596 * @param pPool The pool.
1597 * @param GCPhys The GC physical address of the page we're gonna shadow.
1598 * @param enmKind The kind of mapping.
1599 * @param iUser The shadow page pool index of the user table.
1600 * @param iUserTable The index into the user table (shadowed).
1601 * @param ppPage Where to store the pointer to the page.
1602 */
1603static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1604{
1605#ifndef IN_RC
1606 const PVM pVM = pPool->CTX_SUFF(pVM);
1607#endif
1608 /*
1609 * Look up the GCPhys in the hash.
1610 */
1611 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1612 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
1613 if (i != NIL_PGMPOOL_IDX)
1614 {
1615 do
1616 {
1617 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1618 Log3(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1619 if (pPage->GCPhys == GCPhys)
1620 {
1621 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1622 {
1623 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1624 if (RT_SUCCESS(rc))
1625 {
1626 *ppPage = pPage;
1627 STAM_COUNTER_INC(&pPool->StatCacheHits);
1628 return VINF_PGM_CACHED_PAGE;
1629 }
1630 return rc;
1631 }
1632
1633 /*
1634 * The kind is different. In some cases we should now flush the page
1635 * as it has been reused, but in most cases this is normal remapping
1636 * of PDs as PT or big pages using the GCPhys field in a slightly
1637 * different way than the other kinds.
1638 */
1639 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1640 {
1641 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1642 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1643 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1644 break;
1645 }
1646 }
1647
1648 /* next */
1649 i = pPage->iNext;
1650 } while (i != NIL_PGMPOOL_IDX);
1651 }
1652
1653 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
1654 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1655 return VERR_FILE_NOT_FOUND;
1656}
1657
1658
1659/**
1660 * Inserts a page into the cache.
1661 *
1662 * @param pPool The pool.
1663 * @param pPage The cached page.
1664 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1665 */
1666static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1667{
1668 /*
1669 * Insert into the GCPhys hash if the page is fit for that.
1670 */
1671 Assert(!pPage->fCached);
1672 if (fCanBeCached)
1673 {
1674 pPage->fCached = true;
1675 pgmPoolHashInsert(pPool, pPage);
1676 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1677 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1678 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1679 }
1680 else
1681 {
1682 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1683 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1684 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1685 }
1686
1687 /*
1688 * Insert at the head of the age list.
1689 */
1690 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1691 pPage->iAgeNext = pPool->iAgeHead;
1692 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1693 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1694 else
1695 pPool->iAgeTail = pPage->idx;
1696 pPool->iAgeHead = pPage->idx;
1697}
1698
1699
1700/**
1701 * Flushes a cached page.
1702 *
1703 * @param pPool The pool.
1704 * @param pPage The cached page.
1705 */
1706static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1707{
1708 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1709
1710 /*
1711 * Remove the page from the hash.
1712 */
1713 if (pPage->fCached)
1714 {
1715 pPage->fCached = false;
1716 pgmPoolHashRemove(pPool, pPage);
1717 }
1718 else
1719 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1720
1721 /*
1722 * Remove it from the age list.
1723 */
1724 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1725 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1726 else
1727 pPool->iAgeTail = pPage->iAgePrev;
1728 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1729 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1730 else
1731 pPool->iAgeHead = pPage->iAgeNext;
1732 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1733 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1734}
1735
1736#endif /* PGMPOOL_WITH_CACHE */
1737#ifdef PGMPOOL_WITH_MONITORING
1738
1739/**
1740 * Looks for pages sharing the monitor.
1741 *
1742 * @returns Pointer to the head page.
1743 * @returns NULL if not found.
1744 * @param pPool The Pool
1745 * @param pNewPage The page which is going to be monitored.
1746 */
1747static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1748{
1749#ifdef PGMPOOL_WITH_CACHE
1750 /*
1751 * Look up the GCPhys in the hash.
1752 */
1753 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1754 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1755 if (i == NIL_PGMPOOL_IDX)
1756 return NULL;
1757 do
1758 {
1759 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1760 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1761 && pPage != pNewPage)
1762 {
1763 switch (pPage->enmKind)
1764 {
1765 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1766 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1767 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1768 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1769 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1770 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1771 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1772 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1773 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1774 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1775 case PGMPOOLKIND_64BIT_PML4:
1776#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1777 case PGMPOOLKIND_32BIT_PD:
1778 case PGMPOOLKIND_PAE_PDPT:
1779#else
1780 case PGMPOOLKIND_ROOT_32BIT_PD:
1781 case PGMPOOLKIND_ROOT_PAE_PD:
1782 case PGMPOOLKIND_ROOT_PDPT:
1783#endif
1784 {
1785 /* find the head */
1786 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1787 {
1788 Assert(pPage->iMonitoredPrev != pPage->idx);
1789 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1790 }
1791 return pPage;
1792 }
1793
1794 /* ignore, no monitoring. */
1795 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1796 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1797 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1798 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1799 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1800 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1801 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1802 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1803 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1804 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1805 case PGMPOOLKIND_ROOT_NESTED:
1806 case PGMPOOLKIND_PAE_PD_PHYS:
1807 case PGMPOOLKIND_PAE_PDPT_PHYS:
1808 case PGMPOOLKIND_32BIT_PD_PHYS:
1809#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1810 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1811#endif
1812 break;
1813 default:
1814 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1815 }
1816 }
1817
1818 /* next */
1819 i = pPage->iNext;
1820 } while (i != NIL_PGMPOOL_IDX);
1821#endif
1822 return NULL;
1823}
1824
1825
1826/**
1827 * Enabled write monitoring of a guest page.
1828 *
1829 * @returns VBox status code.
1830 * @retval VINF_SUCCESS on success.
1831 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1832 * @param pPool The pool.
1833 * @param pPage The cached page.
1834 */
1835static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1836{
1837 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1838
1839 /*
1840 * Filter out the relevant kinds.
1841 */
1842 switch (pPage->enmKind)
1843 {
1844 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1845 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1846 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1847 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1848 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1849 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1850 case PGMPOOLKIND_64BIT_PML4:
1851#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1852 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1853 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1854 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1855 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1856 case PGMPOOLKIND_32BIT_PD:
1857 case PGMPOOLKIND_PAE_PDPT:
1858#else
1859 case PGMPOOLKIND_ROOT_PDPT:
1860#endif
1861 break;
1862
1863 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1864 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1865 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1866 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1867 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1868 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1869 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1870 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1871 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1872 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1873 case PGMPOOLKIND_ROOT_NESTED:
1874 /* Nothing to monitor here. */
1875 return VINF_SUCCESS;
1876
1877#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1878 case PGMPOOLKIND_32BIT_PD_PHYS:
1879 case PGMPOOLKIND_PAE_PDPT_PHYS:
1880 case PGMPOOLKIND_PAE_PD_PHYS:
1881 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1882 /* Nothing to monitor here. */
1883 return VINF_SUCCESS;
1884#else
1885 case PGMPOOLKIND_ROOT_32BIT_PD:
1886 case PGMPOOLKIND_ROOT_PAE_PD:
1887#endif
1888#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1889 break;
1890#else
1891 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1892#endif
1893 default:
1894 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1895 }
1896
1897 /*
1898 * Install handler.
1899 */
1900 int rc;
1901 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1902 if (pPageHead)
1903 {
1904 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1905 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1906 pPage->iMonitoredPrev = pPageHead->idx;
1907 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1908 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1909 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1910 pPageHead->iMonitoredNext = pPage->idx;
1911 rc = VINF_SUCCESS;
1912 }
1913 else
1914 {
1915 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1916 PVM pVM = pPool->CTX_SUFF(pVM);
1917 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1918 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1919 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1920 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1921 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1922 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1923 pPool->pszAccessHandler);
1924 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1925 * the heap size should suffice. */
1926 AssertFatalRC(rc);
1927 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1928 rc = VERR_PGM_POOL_CLEARED;
1929 }
1930 pPage->fMonitored = true;
1931 return rc;
1932}
1933
1934
1935/**
1936 * Disables write monitoring of a guest page.
1937 *
1938 * @returns VBox status code.
1939 * @retval VINF_SUCCESS on success.
1940 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1941 * @param pPool The pool.
1942 * @param pPage The cached page.
1943 */
1944static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1945{
1946 /*
1947 * Filter out the relevant kinds.
1948 */
1949 switch (pPage->enmKind)
1950 {
1951 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1952 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1953 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1954 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1955 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1956 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1957 case PGMPOOLKIND_64BIT_PML4:
1958#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1959 case PGMPOOLKIND_32BIT_PD:
1960 case PGMPOOLKIND_PAE_PDPT:
1961 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1962 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1963 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1964 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1965#else
1966 case PGMPOOLKIND_ROOT_PDPT:
1967#endif
1968 break;
1969
1970 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1971 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1972 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1973 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1974 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1975 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1976 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1977 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1978 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1979 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1980 case PGMPOOLKIND_ROOT_NESTED:
1981 case PGMPOOLKIND_PAE_PD_PHYS:
1982 case PGMPOOLKIND_PAE_PDPT_PHYS:
1983 case PGMPOOLKIND_32BIT_PD_PHYS:
1984 /* Nothing to monitor here. */
1985 return VINF_SUCCESS;
1986
1987#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1988 case PGMPOOLKIND_ROOT_32BIT_PD:
1989 case PGMPOOLKIND_ROOT_PAE_PD:
1990#endif
1991#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1992 break;
1993#endif
1994#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1995 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1996#endif
1997 default:
1998 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1999 }
2000
2001 /*
2002 * Remove the page from the monitored list or uninstall it if last.
2003 */
2004 const PVM pVM = pPool->CTX_SUFF(pVM);
2005 int rc;
2006 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2007 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2008 {
2009 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2010 {
2011 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2012 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2013#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2014 pNewHead->fCR3Mix = pPage->fCR3Mix;
2015#endif
2016 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2017 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2018 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2019 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2020 pPool->pszAccessHandler);
2021 AssertFatalRCSuccess(rc);
2022 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2023 }
2024 else
2025 {
2026 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2027 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2028 {
2029 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2030 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2031 }
2032 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2033 rc = VINF_SUCCESS;
2034 }
2035 }
2036 else
2037 {
2038 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2039 AssertFatalRC(rc);
2040 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2041 rc = VERR_PGM_POOL_CLEARED;
2042 }
2043 pPage->fMonitored = false;
2044
2045 /*
2046 * Remove it from the list of modified pages (if in it).
2047 */
2048 pgmPoolMonitorModifiedRemove(pPool, pPage);
2049
2050 return rc;
2051}
2052
2053# if defined(PGMPOOL_WITH_MIXED_PT_CR3) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2054
2055/**
2056 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
2057 *
2058 * @param pPool The Pool.
2059 * @param pPage A page in the chain.
2060 * @param fCR3Mix The new fCR3Mix value.
2061 */
2062static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
2063{
2064 /* current */
2065 pPage->fCR3Mix = fCR3Mix;
2066
2067 /* before */
2068 int16_t idx = pPage->iMonitoredPrev;
2069 while (idx != NIL_PGMPOOL_IDX)
2070 {
2071 pPool->aPages[idx].fCR3Mix = fCR3Mix;
2072 idx = pPool->aPages[idx].iMonitoredPrev;
2073 }
2074
2075 /* after */
2076 idx = pPage->iMonitoredNext;
2077 while (idx != NIL_PGMPOOL_IDX)
2078 {
2079 pPool->aPages[idx].fCR3Mix = fCR3Mix;
2080 idx = pPool->aPages[idx].iMonitoredNext;
2081 }
2082}
2083
2084
2085/**
2086 * Installs or modifies monitoring of a CR3 page (special).
2087 *
2088 * We're pretending the CR3 page is shadowed by the pool so we can use the
2089 * generic mechanisms in detecting chained monitoring. (This also gives us a
2090 * tast of what code changes are required to really pool CR3 shadow pages.)
2091 *
2092 * @returns VBox status code.
2093 * @param pPool The pool.
2094 * @param idxRoot The CR3 (root) page index.
2095 * @param GCPhysCR3 The (new) CR3 value.
2096 */
2097int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
2098{
2099 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
2100 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
2101 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d} GCPhysCR3=%RGp\n",
2102 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
2103
2104 /*
2105 * The unlikely case where it already matches.
2106 */
2107 if (pPage->GCPhys == GCPhysCR3)
2108 {
2109 Assert(pPage->fMonitored);
2110 return VINF_SUCCESS;
2111 }
2112
2113 /*
2114 * Flush the current monitoring and remove it from the hash.
2115 */
2116 int rc = VINF_SUCCESS;
2117 if (pPage->fMonitored)
2118 {
2119 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
2120 rc = pgmPoolMonitorFlush(pPool, pPage);
2121 if (rc == VERR_PGM_POOL_CLEARED)
2122 rc = VINF_SUCCESS;
2123 else
2124 AssertFatalRC(rc);
2125 pgmPoolHashRemove(pPool, pPage);
2126 }
2127
2128 /*
2129 * Monitor the page at the new location and insert it into the hash.
2130 */
2131 pPage->GCPhys = GCPhysCR3;
2132 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
2133 if (rc2 != VERR_PGM_POOL_CLEARED)
2134 {
2135 AssertFatalRC(rc2);
2136 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
2137 rc = rc2;
2138 }
2139 pgmPoolHashInsert(pPool, pPage);
2140 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
2141 return rc;
2142}
2143
2144
2145/**
2146 * Removes the monitoring of a CR3 page (special).
2147 *
2148 * @returns VBox status code.
2149 * @param pPool The pool.
2150 * @param idxRoot The CR3 (root) page index.
2151 */
2152int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
2153{
2154 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
2155 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
2156 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d}\n",
2157 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
2158
2159 if (!pPage->fMonitored)
2160 return VINF_SUCCESS;
2161
2162 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
2163 int rc = pgmPoolMonitorFlush(pPool, pPage);
2164 if (rc != VERR_PGM_POOL_CLEARED)
2165 AssertFatalRC(rc);
2166 else
2167 rc = VINF_SUCCESS;
2168 pgmPoolHashRemove(pPool, pPage);
2169 Assert(!pPage->fMonitored);
2170 pPage->GCPhys = NIL_RTGCPHYS;
2171 return rc;
2172}
2173
2174# endif /* PGMPOOL_WITH_MIXED_PT_CR3 && !VBOX_WITH_PGMPOOL_PAGING_ONLY*/
2175
2176/**
2177 * Inserts the page into the list of modified pages.
2178 *
2179 * @param pPool The pool.
2180 * @param pPage The page.
2181 */
2182void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2183{
2184 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2185 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2186 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2187 && pPool->iModifiedHead != pPage->idx,
2188 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2189 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2190 pPool->iModifiedHead, pPool->cModifiedPages));
2191
2192 pPage->iModifiedNext = pPool->iModifiedHead;
2193 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2194 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2195 pPool->iModifiedHead = pPage->idx;
2196 pPool->cModifiedPages++;
2197#ifdef VBOX_WITH_STATISTICS
2198 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2199 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2200#endif
2201}
2202
2203
2204/**
2205 * Removes the page from the list of modified pages and resets the
2206 * moficiation counter.
2207 *
2208 * @param pPool The pool.
2209 * @param pPage The page which is believed to be in the list of modified pages.
2210 */
2211static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2212{
2213 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2214 if (pPool->iModifiedHead == pPage->idx)
2215 {
2216 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2217 pPool->iModifiedHead = pPage->iModifiedNext;
2218 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2219 {
2220 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2221 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2222 }
2223 pPool->cModifiedPages--;
2224 }
2225 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2226 {
2227 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2228 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2229 {
2230 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2231 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2232 }
2233 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2234 pPool->cModifiedPages--;
2235 }
2236 else
2237 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2238 pPage->cModifications = 0;
2239}
2240
2241
2242/**
2243 * Zaps the list of modified pages, resetting their modification counters in the process.
2244 *
2245 * @param pVM The VM handle.
2246 */
2247void pgmPoolMonitorModifiedClearAll(PVM pVM)
2248{
2249 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2250 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2251
2252 unsigned cPages = 0; NOREF(cPages);
2253 uint16_t idx = pPool->iModifiedHead;
2254 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2255 while (idx != NIL_PGMPOOL_IDX)
2256 {
2257 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2258 idx = pPage->iModifiedNext;
2259 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2260 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2261 pPage->cModifications = 0;
2262 Assert(++cPages);
2263 }
2264 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2265 pPool->cModifiedPages = 0;
2266}
2267
2268
2269#ifdef IN_RING3
2270/**
2271 * Clear all shadow pages and clear all modification counters.
2272 *
2273 * @param pVM The VM handle.
2274 * @remark Should only be used when monitoring is available, thus placed in
2275 * the PGMPOOL_WITH_MONITORING #ifdef.
2276 */
2277void pgmPoolClearAll(PVM pVM)
2278{
2279 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2280 STAM_PROFILE_START(&pPool->StatClearAll, c);
2281 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2282
2283 /*
2284 * Iterate all the pages until we've encountered all that in use.
2285 * This is simple but not quite optimal solution.
2286 */
2287 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2288 unsigned cLeft = pPool->cUsedPages;
2289 unsigned iPage = pPool->cCurPages;
2290 while (--iPage >= PGMPOOL_IDX_FIRST)
2291 {
2292 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2293 if (pPage->GCPhys != NIL_RTGCPHYS)
2294 {
2295 switch (pPage->enmKind)
2296 {
2297 /*
2298 * We only care about shadow page tables.
2299 */
2300 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2301 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2302 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2303 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2304 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2305 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2306 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2307 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2308 {
2309#ifdef PGMPOOL_WITH_USER_TRACKING
2310 if (pPage->cPresent)
2311#endif
2312 {
2313 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2314 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2315 ASMMemZeroPage(pvShw);
2316 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2317#ifdef PGMPOOL_WITH_USER_TRACKING
2318 pPage->cPresent = 0;
2319 pPage->iFirstPresent = ~0;
2320#endif
2321 }
2322 }
2323 /* fall thru */
2324
2325 default:
2326 Assert(!pPage->cModifications || ++cModifiedPages);
2327 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2328 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2329 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2330 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2331 pPage->cModifications = 0;
2332 break;
2333
2334 }
2335 if (!--cLeft)
2336 break;
2337 }
2338 }
2339
2340 /* swipe the special pages too. */
2341 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2342 {
2343 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2344 if (pPage->GCPhys != NIL_RTGCPHYS)
2345 {
2346 Assert(!pPage->cModifications || ++cModifiedPages);
2347 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2348 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2349 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2350 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2351 pPage->cModifications = 0;
2352 }
2353 }
2354
2355#ifndef DEBUG_michael
2356 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2357#endif
2358 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2359 pPool->cModifiedPages = 0;
2360
2361#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2362 /*
2363 * Clear all the GCPhys links and rebuild the phys ext free list.
2364 */
2365 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2366 pRam;
2367 pRam = pRam->CTX_SUFF(pNext))
2368 {
2369 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2370 while (iPage-- > 0)
2371 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2372 }
2373
2374 pPool->iPhysExtFreeHead = 0;
2375 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2376 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2377 for (unsigned i = 0; i < cMaxPhysExts; i++)
2378 {
2379 paPhysExts[i].iNext = i + 1;
2380 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2381 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2382 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2383 }
2384 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2385#endif
2386
2387
2388 pPool->cPresent = 0;
2389 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2390}
2391#endif /* IN_RING3 */
2392
2393
2394/**
2395 * Handle SyncCR3 pool tasks
2396 *
2397 * @returns VBox status code.
2398 * @retval VINF_SUCCESS if successfully added.
2399 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2400 * @param pVM The VM handle.
2401 * @remark Should only be used when monitoring is available, thus placed in
2402 * the PGMPOOL_WITH_MONITORING #ifdef.
2403 */
2404int pgmPoolSyncCR3(PVM pVM)
2405{
2406 /*
2407 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2408 * Occasionally we will have to clear all the shadow page tables because we wanted
2409 * to monitor a page which was mapped by too many shadowed page tables. This operation
2410 * sometimes refered to as a 'lightweight flush'.
2411 */
2412 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2413 pgmPoolMonitorModifiedClearAll(pVM);
2414 else
2415 {
2416# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2417 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2418 pgmPoolClearAll(pVM);
2419# else /* !IN_RING3 */
2420 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2421 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2422 return VINF_PGM_SYNC_CR3;
2423# endif /* !IN_RING3 */
2424 }
2425 return VINF_SUCCESS;
2426}
2427
2428#endif /* PGMPOOL_WITH_MONITORING */
2429#ifdef PGMPOOL_WITH_USER_TRACKING
2430
2431/**
2432 * Frees up at least one user entry.
2433 *
2434 * @returns VBox status code.
2435 * @retval VINF_SUCCESS if successfully added.
2436 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2437 * @param pPool The pool.
2438 * @param iUser The user index.
2439 */
2440static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2441{
2442 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2443#ifdef PGMPOOL_WITH_CACHE
2444 /*
2445 * Just free cached pages in a braindead fashion.
2446 */
2447 /** @todo walk the age list backwards and free the first with usage. */
2448 int rc = VINF_SUCCESS;
2449 do
2450 {
2451 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2452 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2453 rc = rc2;
2454 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2455 return rc;
2456#else
2457 /*
2458 * Lazy approach.
2459 */
2460 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2461 Assert(!CPUMIsGuestInLongMode(pVM));
2462 pgmPoolFlushAllInt(pPool);
2463 return VERR_PGM_POOL_FLUSHED;
2464#endif
2465}
2466
2467
2468/**
2469 * Inserts a page into the cache.
2470 *
2471 * This will create user node for the page, insert it into the GCPhys
2472 * hash, and insert it into the age list.
2473 *
2474 * @returns VBox status code.
2475 * @retval VINF_SUCCESS if successfully added.
2476 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2477 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2478 * @param pPool The pool.
2479 * @param pPage The cached page.
2480 * @param GCPhys The GC physical address of the page we're gonna shadow.
2481 * @param iUser The user index.
2482 * @param iUserTable The user table index.
2483 */
2484DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2485{
2486 int rc = VINF_SUCCESS;
2487 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2488
2489 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2490
2491#ifdef VBOX_STRICT
2492 /*
2493 * Check that the entry doesn't already exists.
2494 */
2495 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2496 {
2497 uint16_t i = pPage->iUserHead;
2498 do
2499 {
2500 Assert(i < pPool->cMaxUsers);
2501 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2502 i = paUsers[i].iNext;
2503 } while (i != NIL_PGMPOOL_USER_INDEX);
2504 }
2505#endif
2506
2507 /*
2508 * Find free a user node.
2509 */
2510 uint16_t i = pPool->iUserFreeHead;
2511 if (i == NIL_PGMPOOL_USER_INDEX)
2512 {
2513 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2514 if (RT_FAILURE(rc))
2515 return rc;
2516 i = pPool->iUserFreeHead;
2517 }
2518
2519 /*
2520 * Unlink the user node from the free list,
2521 * initialize and insert it into the user list.
2522 */
2523 pPool->iUserFreeHead = paUsers[i].iNext;
2524 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2525 paUsers[i].iUser = iUser;
2526 paUsers[i].iUserTable = iUserTable;
2527 pPage->iUserHead = i;
2528
2529 /*
2530 * Insert into cache and enable monitoring of the guest page if enabled.
2531 *
2532 * Until we implement caching of all levels, including the CR3 one, we'll
2533 * have to make sure we don't try monitor & cache any recursive reuse of
2534 * a monitored CR3 page. Because all windows versions are doing this we'll
2535 * have to be able to do combined access monitoring, CR3 + PT and
2536 * PD + PT (guest PAE).
2537 *
2538 * Update:
2539 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2540 */
2541#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2542# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2543 const bool fCanBeMonitored = true;
2544# else
2545 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2546 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2547 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2548# endif
2549# ifdef PGMPOOL_WITH_CACHE
2550 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2551# endif
2552 if (fCanBeMonitored)
2553 {
2554# ifdef PGMPOOL_WITH_MONITORING
2555 rc = pgmPoolMonitorInsert(pPool, pPage);
2556 if (rc == VERR_PGM_POOL_CLEARED)
2557 {
2558 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2559# ifndef PGMPOOL_WITH_CACHE
2560 pgmPoolMonitorFlush(pPool, pPage);
2561 rc = VERR_PGM_POOL_FLUSHED;
2562# endif
2563 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2564 paUsers[i].iNext = pPool->iUserFreeHead;
2565 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2566 pPool->iUserFreeHead = i;
2567 }
2568 }
2569# endif
2570#endif /* PGMPOOL_WITH_MONITORING */
2571 return rc;
2572}
2573
2574
2575# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2576/**
2577 * Adds a user reference to a page.
2578 *
2579 * This will move the page to the head of the
2580 *
2581 * @returns VBox status code.
2582 * @retval VINF_SUCCESS if successfully added.
2583 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2584 * @param pPool The pool.
2585 * @param pPage The cached page.
2586 * @param iUser The user index.
2587 * @param iUserTable The user table.
2588 */
2589static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2590{
2591 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2592
2593 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2594# ifdef VBOX_STRICT
2595 /*
2596 * Check that the entry doesn't already exists.
2597 */
2598 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2599 {
2600 uint16_t i = pPage->iUserHead;
2601 do
2602 {
2603 Assert(i < pPool->cMaxUsers);
2604 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2605 i = paUsers[i].iNext;
2606 } while (i != NIL_PGMPOOL_USER_INDEX);
2607 }
2608# endif
2609
2610 /*
2611 * Allocate a user node.
2612 */
2613 uint16_t i = pPool->iUserFreeHead;
2614 if (i == NIL_PGMPOOL_USER_INDEX)
2615 {
2616 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2617 if (RT_FAILURE(rc))
2618 return rc;
2619 i = pPool->iUserFreeHead;
2620 }
2621 pPool->iUserFreeHead = paUsers[i].iNext;
2622
2623 /*
2624 * Initialize the user node and insert it.
2625 */
2626 paUsers[i].iNext = pPage->iUserHead;
2627 paUsers[i].iUser = iUser;
2628 paUsers[i].iUserTable = iUserTable;
2629 pPage->iUserHead = i;
2630
2631# ifdef PGMPOOL_WITH_CACHE
2632 /*
2633 * Tell the cache to update its replacement stats for this page.
2634 */
2635 pgmPoolCacheUsed(pPool, pPage);
2636# endif
2637 return VINF_SUCCESS;
2638}
2639# endif /* PGMPOOL_WITH_CACHE */
2640
2641
2642/**
2643 * Frees a user record associated with a page.
2644 *
2645 * This does not clear the entry in the user table, it simply replaces the
2646 * user record to the chain of free records.
2647 *
2648 * @param pPool The pool.
2649 * @param HCPhys The HC physical address of the shadow page.
2650 * @param iUser The shadow page pool index of the user table.
2651 * @param iUserTable The index into the user table (shadowed).
2652 */
2653static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2654{
2655 /*
2656 * Unlink and free the specified user entry.
2657 */
2658 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2659
2660 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2661 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2662 uint16_t i = pPage->iUserHead;
2663 if ( i != NIL_PGMPOOL_USER_INDEX
2664 && paUsers[i].iUser == iUser
2665 && paUsers[i].iUserTable == iUserTable)
2666 {
2667 pPage->iUserHead = paUsers[i].iNext;
2668
2669 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2670 paUsers[i].iNext = pPool->iUserFreeHead;
2671 pPool->iUserFreeHead = i;
2672 return;
2673 }
2674
2675 /* General: Linear search. */
2676 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2677 while (i != NIL_PGMPOOL_USER_INDEX)
2678 {
2679 if ( paUsers[i].iUser == iUser
2680 && paUsers[i].iUserTable == iUserTable)
2681 {
2682 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2683 paUsers[iPrev].iNext = paUsers[i].iNext;
2684 else
2685 pPage->iUserHead = paUsers[i].iNext;
2686
2687 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2688 paUsers[i].iNext = pPool->iUserFreeHead;
2689 pPool->iUserFreeHead = i;
2690 return;
2691 }
2692 iPrev = i;
2693 i = paUsers[i].iNext;
2694 }
2695
2696 /* Fatal: didn't find it */
2697 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2698 iUser, iUserTable, pPage->GCPhys));
2699}
2700
2701
2702/**
2703 * Gets the entry size of a shadow table.
2704 *
2705 * @param enmKind The kind of page.
2706 *
2707 * @returns The size of the entry in bytes. That is, 4 or 8.
2708 * @returns If the kind is not for a table, an assertion is raised and 0 is
2709 * returned.
2710 */
2711DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2712{
2713 switch (enmKind)
2714 {
2715 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2716 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2717 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2718#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2719 case PGMPOOLKIND_32BIT_PD:
2720 case PGMPOOLKIND_32BIT_PD_PHYS:
2721#else
2722 case PGMPOOLKIND_ROOT_32BIT_PD:
2723#endif
2724 return 4;
2725
2726 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2727 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2728 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2729 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2730 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2731 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2732 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2733 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2734 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2735 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2736 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2737 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2738 case PGMPOOLKIND_64BIT_PML4:
2739#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2740 case PGMPOOLKIND_ROOT_PAE_PD:
2741 case PGMPOOLKIND_ROOT_PDPT:
2742#endif
2743 case PGMPOOLKIND_PAE_PDPT:
2744 case PGMPOOLKIND_ROOT_NESTED:
2745 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2746 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2747 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2748 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2749 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2750 case PGMPOOLKIND_PAE_PD_PHYS:
2751 case PGMPOOLKIND_PAE_PDPT_PHYS:
2752 return 8;
2753
2754 default:
2755 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2756 }
2757}
2758
2759
2760/**
2761 * Gets the entry size of a guest table.
2762 *
2763 * @param enmKind The kind of page.
2764 *
2765 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2766 * @returns If the kind is not for a table, an assertion is raised and 0 is
2767 * returned.
2768 */
2769DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2770{
2771 switch (enmKind)
2772 {
2773 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2774 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2775#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2776 case PGMPOOLKIND_32BIT_PD:
2777#else
2778 case PGMPOOLKIND_ROOT_32BIT_PD:
2779#endif
2780 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2781 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2782 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2783 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2784 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2785 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2786 return 4;
2787
2788 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2789 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2790 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2791 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2792 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2793 case PGMPOOLKIND_64BIT_PML4:
2794#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2795 case PGMPOOLKIND_PAE_PDPT:
2796#else
2797 case PGMPOOLKIND_ROOT_PAE_PD:
2798 case PGMPOOLKIND_ROOT_PDPT:
2799#endif
2800 return 8;
2801
2802 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2803 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2804 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2805 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2806 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2807 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2808 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2809 case PGMPOOLKIND_ROOT_NESTED:
2810 case PGMPOOLKIND_PAE_PD_PHYS:
2811 case PGMPOOLKIND_PAE_PDPT_PHYS:
2812 case PGMPOOLKIND_32BIT_PD_PHYS:
2813 /** @todo can we return 0? (nobody is calling this...) */
2814 AssertFailed();
2815 return 0;
2816
2817 default:
2818 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2819 }
2820}
2821
2822#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2823
2824/**
2825 * Scans one shadow page table for mappings of a physical page.
2826 *
2827 * @param pVM The VM handle.
2828 * @param pPhysPage The guest page in question.
2829 * @param iShw The shadow page table.
2830 * @param cRefs The number of references made in that PT.
2831 */
2832static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2833{
2834 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2835 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2836
2837 /*
2838 * Assert sanity.
2839 */
2840 Assert(cRefs == 1);
2841 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2842 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2843
2844 /*
2845 * Then, clear the actual mappings to the page in the shadow PT.
2846 */
2847 switch (pPage->enmKind)
2848 {
2849 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2850 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2851 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2852 {
2853 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2854 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2855 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2856 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2857 {
2858 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2859 pPT->a[i].u = 0;
2860 cRefs--;
2861 if (!cRefs)
2862 return;
2863 }
2864#ifdef LOG_ENABLED
2865 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2866 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2867 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2868 {
2869 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2870 pPT->a[i].u = 0;
2871 }
2872#endif
2873 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2874 break;
2875 }
2876
2877 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2878 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2879 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2880 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2881 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2882 {
2883 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2884 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2885 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2886 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2887 {
2888 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2889 pPT->a[i].u = 0;
2890 cRefs--;
2891 if (!cRefs)
2892 return;
2893 }
2894#ifdef LOG_ENABLED
2895 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2896 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2897 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2898 {
2899 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2900 pPT->a[i].u = 0;
2901 }
2902#endif
2903 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
2904 break;
2905 }
2906
2907 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2908 {
2909 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2910 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2911 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2912 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2913 {
2914 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2915 pPT->a[i].u = 0;
2916 cRefs--;
2917 if (!cRefs)
2918 return;
2919 }
2920#ifdef LOG_ENABLED
2921 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2922 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2923 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2924 {
2925 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2926 pPT->a[i].u = 0;
2927 }
2928#endif
2929 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2930 break;
2931 }
2932
2933 default:
2934 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2935 }
2936}
2937
2938
2939/**
2940 * Scans one shadow page table for mappings of a physical page.
2941 *
2942 * @param pVM The VM handle.
2943 * @param pPhysPage The guest page in question.
2944 * @param iShw The shadow page table.
2945 * @param cRefs The number of references made in that PT.
2946 */
2947void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2948{
2949 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2950 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2951 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2952 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2953 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2954 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2955}
2956
2957
2958/**
2959 * Flushes a list of shadow page tables mapping the same physical page.
2960 *
2961 * @param pVM The VM handle.
2962 * @param pPhysPage The guest page in question.
2963 * @param iPhysExt The physical cross reference extent list to flush.
2964 */
2965void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2966{
2967 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2968 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2969 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2970
2971 const uint16_t iPhysExtStart = iPhysExt;
2972 PPGMPOOLPHYSEXT pPhysExt;
2973 do
2974 {
2975 Assert(iPhysExt < pPool->cMaxPhysExts);
2976 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2977 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2978 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2979 {
2980 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2981 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2982 }
2983
2984 /* next */
2985 iPhysExt = pPhysExt->iNext;
2986 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2987
2988 /* insert the list into the free list and clear the ram range entry. */
2989 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2990 pPool->iPhysExtFreeHead = iPhysExtStart;
2991 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2992
2993 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2994}
2995
2996#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2997
2998/**
2999 * Scans all shadow page tables for mappings of a physical page.
3000 *
3001 * This may be slow, but it's most likely more efficient than cleaning
3002 * out the entire page pool / cache.
3003 *
3004 * @returns VBox status code.
3005 * @retval VINF_SUCCESS if all references has been successfully cleared.
3006 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3007 * a page pool cleaning.
3008 *
3009 * @param pVM The VM handle.
3010 * @param pPhysPage The guest page in question.
3011 */
3012int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3013{
3014 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3015 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3016 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
3017 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
3018
3019#if 1
3020 /*
3021 * There is a limit to what makes sense.
3022 */
3023 if (pPool->cPresent > 1024)
3024 {
3025 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3026 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3027 return VINF_PGM_GCPHYS_ALIASED;
3028 }
3029#endif
3030
3031 /*
3032 * Iterate all the pages until we've encountered all that in use.
3033 * This is simple but not quite optimal solution.
3034 */
3035 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3036 const uint32_t u32 = u64;
3037 unsigned cLeft = pPool->cUsedPages;
3038 unsigned iPage = pPool->cCurPages;
3039 while (--iPage >= PGMPOOL_IDX_FIRST)
3040 {
3041 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3042 if (pPage->GCPhys != NIL_RTGCPHYS)
3043 {
3044 switch (pPage->enmKind)
3045 {
3046 /*
3047 * We only care about shadow page tables.
3048 */
3049 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3050 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3051 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3052 {
3053 unsigned cPresent = pPage->cPresent;
3054 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3055 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3056 if (pPT->a[i].n.u1Present)
3057 {
3058 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3059 {
3060 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3061 pPT->a[i].u = 0;
3062 }
3063 if (!--cPresent)
3064 break;
3065 }
3066 break;
3067 }
3068
3069 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3070 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3071 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3072 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3073 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3074 {
3075 unsigned cPresent = pPage->cPresent;
3076 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3077 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3078 if (pPT->a[i].n.u1Present)
3079 {
3080 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3081 {
3082 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3083 pPT->a[i].u = 0;
3084 }
3085 if (!--cPresent)
3086 break;
3087 }
3088 break;
3089 }
3090 }
3091 if (!--cLeft)
3092 break;
3093 }
3094 }
3095
3096 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3097 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3098 return VINF_SUCCESS;
3099}
3100
3101
3102/**
3103 * Clears the user entry in a user table.
3104 *
3105 * This is used to remove all references to a page when flushing it.
3106 */
3107static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3108{
3109 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3110 Assert(pUser->iUser < pPool->cCurPages);
3111 uint32_t iUserTable = pUser->iUserTable;
3112
3113 /*
3114 * Map the user page.
3115 */
3116 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3117#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3118 if (pUserPage->enmKind == PGMPOOLKIND_ROOT_PAE_PD)
3119 {
3120 /* Must translate the fake 2048 entry PD to a 512 PD one since the R0 mapping is not linear. */
3121 Assert(pUser->iUser == PGMPOOL_IDX_PAE_PD);
3122 uint32_t iPdpt = iUserTable / X86_PG_PAE_ENTRIES;
3123 iUserTable %= X86_PG_PAE_ENTRIES;
3124 pUserPage = &pPool->aPages[PGMPOOL_IDX_PAE_PD_0 + iPdpt];
3125 Assert(pUserPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD);
3126 }
3127#endif
3128 union
3129 {
3130 uint64_t *pau64;
3131 uint32_t *pau32;
3132 } u;
3133 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3134
3135 /* Safety precaution in case we change the paging for other modes too in the future. */
3136 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
3137
3138#ifdef VBOX_STRICT
3139 /*
3140 * Some sanity checks.
3141 */
3142 switch (pUserPage->enmKind)
3143 {
3144# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3145 case PGMPOOLKIND_32BIT_PD:
3146 case PGMPOOLKIND_32BIT_PD_PHYS:
3147 Assert(iUserTable < X86_PG_ENTRIES);
3148 break;
3149# else
3150 case PGMPOOLKIND_ROOT_32BIT_PD:
3151 Assert(iUserTable < X86_PG_ENTRIES);
3152 Assert(!(u.pau32[iUserTable] & PGM_PDFLAGS_MAPPING));
3153 break;
3154# endif
3155# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3156 case PGMPOOLKIND_ROOT_PAE_PD:
3157 Assert(iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
3158 AssertMsg(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING), ("%llx %d\n", u.pau64[iUserTable], iUserTable));
3159 break;
3160# endif
3161# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3162 case PGMPOOLKIND_PAE_PDPT:
3163 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3164 case PGMPOOLKIND_PAE_PDPT_PHYS:
3165# else
3166 case PGMPOOLKIND_ROOT_PDPT:
3167# endif
3168 Assert(iUserTable < 4);
3169 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3170 break;
3171 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3172 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3173 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3174 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3175 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3176 case PGMPOOLKIND_PAE_PD_PHYS:
3177 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3178 break;
3179 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3180 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3181 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3182 break;
3183 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3184 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3185 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3186 break;
3187 case PGMPOOLKIND_64BIT_PML4:
3188 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3189 /* GCPhys >> PAGE_SHIFT is the index here */
3190 break;
3191 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3192 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3193 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3194 break;
3195
3196 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3197 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3198 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3199 break;
3200
3201 case PGMPOOLKIND_ROOT_NESTED:
3202 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3203 break;
3204
3205 default:
3206 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3207 break;
3208 }
3209#endif /* VBOX_STRICT */
3210
3211 /*
3212 * Clear the entry in the user page.
3213 */
3214 switch (pUserPage->enmKind)
3215 {
3216 /* 32-bit entries */
3217#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3218 case PGMPOOLKIND_32BIT_PD:
3219 case PGMPOOLKIND_32BIT_PD_PHYS:
3220#else
3221 case PGMPOOLKIND_ROOT_32BIT_PD:
3222#endif
3223 u.pau32[iUserTable] = 0;
3224 break;
3225
3226 /* 64-bit entries */
3227 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3228 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3229 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3230 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3231 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3232 case PGMPOOLKIND_PAE_PD_PHYS:
3233 case PGMPOOLKIND_PAE_PDPT_PHYS:
3234 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3235 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3236 case PGMPOOLKIND_64BIT_PML4:
3237 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3238 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3239# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3240 case PGMPOOLKIND_ROOT_PAE_PD:
3241#endif
3242#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3243 case PGMPOOLKIND_PAE_PDPT:
3244 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3245#else
3246 case PGMPOOLKIND_ROOT_PDPT:
3247#endif
3248 case PGMPOOLKIND_ROOT_NESTED:
3249 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3250 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3251 u.pau64[iUserTable] = 0;
3252 break;
3253
3254 default:
3255 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3256 }
3257}
3258
3259
3260/**
3261 * Clears all users of a page.
3262 */
3263static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3264{
3265 /*
3266 * Free all the user records.
3267 */
3268 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3269
3270 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3271 uint16_t i = pPage->iUserHead;
3272 while (i != NIL_PGMPOOL_USER_INDEX)
3273 {
3274 /* Clear enter in user table. */
3275 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3276
3277 /* Free it. */
3278 const uint16_t iNext = paUsers[i].iNext;
3279 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3280 paUsers[i].iNext = pPool->iUserFreeHead;
3281 pPool->iUserFreeHead = i;
3282
3283 /* Next. */
3284 i = iNext;
3285 }
3286 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3287}
3288
3289#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3290
3291/**
3292 * Allocates a new physical cross reference extent.
3293 *
3294 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3295 * @param pVM The VM handle.
3296 * @param piPhysExt Where to store the phys ext index.
3297 */
3298PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3299{
3300 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3301 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3302 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3303 {
3304 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3305 return NULL;
3306 }
3307 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3308 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3309 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3310 *piPhysExt = iPhysExt;
3311 return pPhysExt;
3312}
3313
3314
3315/**
3316 * Frees a physical cross reference extent.
3317 *
3318 * @param pVM The VM handle.
3319 * @param iPhysExt The extent to free.
3320 */
3321void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3322{
3323 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3324 Assert(iPhysExt < pPool->cMaxPhysExts);
3325 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3326 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3327 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3328 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3329 pPool->iPhysExtFreeHead = iPhysExt;
3330}
3331
3332
3333/**
3334 * Frees a physical cross reference extent.
3335 *
3336 * @param pVM The VM handle.
3337 * @param iPhysExt The extent to free.
3338 */
3339void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3340{
3341 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3342
3343 const uint16_t iPhysExtStart = iPhysExt;
3344 PPGMPOOLPHYSEXT pPhysExt;
3345 do
3346 {
3347 Assert(iPhysExt < pPool->cMaxPhysExts);
3348 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3349 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3350 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3351
3352 /* next */
3353 iPhysExt = pPhysExt->iNext;
3354 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3355
3356 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3357 pPool->iPhysExtFreeHead = iPhysExtStart;
3358}
3359
3360
3361/**
3362 * Insert a reference into a list of physical cross reference extents.
3363 *
3364 * @returns The new ram range flags (top 16-bits).
3365 *
3366 * @param pVM The VM handle.
3367 * @param iPhysExt The physical extent index of the list head.
3368 * @param iShwPT The shadow page table index.
3369 *
3370 */
3371static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3372{
3373 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3374 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3375
3376 /* special common case. */
3377 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3378 {
3379 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3380 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3381 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
3382 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3383 }
3384
3385 /* general treatment. */
3386 const uint16_t iPhysExtStart = iPhysExt;
3387 unsigned cMax = 15;
3388 for (;;)
3389 {
3390 Assert(iPhysExt < pPool->cMaxPhysExts);
3391 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3392 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3393 {
3394 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3395 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3396 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3397 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3398 }
3399 if (!--cMax)
3400 {
3401 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3402 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3403 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
3404 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3405 }
3406 }
3407
3408 /* add another extent to the list. */
3409 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3410 if (!pNew)
3411 {
3412 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3413 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3414 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3415 }
3416 pNew->iNext = iPhysExtStart;
3417 pNew->aidx[0] = iShwPT;
3418 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3419 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3420}
3421
3422
3423/**
3424 * Add a reference to guest physical page where extents are in use.
3425 *
3426 * @returns The new ram range flags (top 16-bits).
3427 *
3428 * @param pVM The VM handle.
3429 * @param u16 The ram range flags (top 16-bits).
3430 * @param iShwPT The shadow page table index.
3431 */
3432uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3433{
3434 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
3435 {
3436 /*
3437 * Convert to extent list.
3438 */
3439 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
3440 uint16_t iPhysExt;
3441 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3442 if (pPhysExt)
3443 {
3444 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
3445 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3446 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
3447 pPhysExt->aidx[1] = iShwPT;
3448 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3449 }
3450 else
3451 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3452 }
3453 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
3454 {
3455 /*
3456 * Insert into the extent list.
3457 */
3458 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
3459 }
3460 else
3461 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3462 return u16;
3463}
3464
3465
3466/**
3467 * Clear references to guest physical memory.
3468 *
3469 * @param pPool The pool.
3470 * @param pPage The page.
3471 * @param pPhysPage Pointer to the aPages entry in the ram range.
3472 */
3473void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3474{
3475 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
3476 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3477
3478 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
3479 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
3480 {
3481 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3482 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3483 do
3484 {
3485 Assert(iPhysExt < pPool->cMaxPhysExts);
3486
3487 /*
3488 * Look for the shadow page and check if it's all freed.
3489 */
3490 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3491 {
3492 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3493 {
3494 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3495
3496 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3497 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3498 {
3499 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3500 return;
3501 }
3502
3503 /* we can free the node. */
3504 PVM pVM = pPool->CTX_SUFF(pVM);
3505 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3506 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3507 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3508 {
3509 /* lonely node */
3510 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3511 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
3512 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3513 }
3514 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3515 {
3516 /* head */
3517 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
3518 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
3519 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
3520 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
3521 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3522 }
3523 else
3524 {
3525 /* in list */
3526 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3527 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3528 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3529 }
3530 iPhysExt = iPhysExtNext;
3531 return;
3532 }
3533 }
3534
3535 /* next */
3536 iPhysExtPrev = iPhysExt;
3537 iPhysExt = paPhysExts[iPhysExt].iNext;
3538 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3539
3540 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3541 }
3542 else /* nothing to do */
3543 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
3544}
3545
3546
3547/**
3548 * Clear references to guest physical memory.
3549 *
3550 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3551 * is assumed to be correct, so the linear search can be skipped and we can assert
3552 * at an earlier point.
3553 *
3554 * @param pPool The pool.
3555 * @param pPage The page.
3556 * @param HCPhys The host physical address corresponding to the guest page.
3557 * @param GCPhys The guest physical address corresponding to HCPhys.
3558 */
3559static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3560{
3561 /*
3562 * Walk range list.
3563 */
3564 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3565 while (pRam)
3566 {
3567 RTGCPHYS off = GCPhys - pRam->GCPhys;
3568 if (off < pRam->cb)
3569 {
3570 /* does it match? */
3571 const unsigned iPage = off >> PAGE_SHIFT;
3572 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3573#ifdef LOG_ENABLED
3574RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3575Log(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3576#endif
3577 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3578 {
3579 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3580 return;
3581 }
3582 break;
3583 }
3584 pRam = pRam->CTX_SUFF(pNext);
3585 }
3586 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3587}
3588
3589
3590/**
3591 * Clear references to guest physical memory.
3592 *
3593 * @param pPool The pool.
3594 * @param pPage The page.
3595 * @param HCPhys The host physical address corresponding to the guest page.
3596 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3597 */
3598static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3599{
3600 /*
3601 * Walk range list.
3602 */
3603 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3604 while (pRam)
3605 {
3606 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3607 if (off < pRam->cb)
3608 {
3609 /* does it match? */
3610 const unsigned iPage = off >> PAGE_SHIFT;
3611 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3612 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3613 {
3614 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3615 return;
3616 }
3617 break;
3618 }
3619 pRam = pRam->CTX_SUFF(pNext);
3620 }
3621
3622 /*
3623 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3624 */
3625 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3626 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3627 while (pRam)
3628 {
3629 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3630 while (iPage-- > 0)
3631 {
3632 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3633 {
3634 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3635 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3636 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3637 return;
3638 }
3639 }
3640 pRam = pRam->CTX_SUFF(pNext);
3641 }
3642
3643 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3644}
3645
3646
3647/**
3648 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3649 *
3650 * @param pPool The pool.
3651 * @param pPage The page.
3652 * @param pShwPT The shadow page table (mapping of the page).
3653 * @param pGstPT The guest page table.
3654 */
3655DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3656{
3657 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3658 if (pShwPT->a[i].n.u1Present)
3659 {
3660 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3661 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3662 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3663 if (!--pPage->cPresent)
3664 break;
3665 }
3666}
3667
3668
3669/**
3670 * Clear references to guest physical memory in a PAE / 32-bit page table.
3671 *
3672 * @param pPool The pool.
3673 * @param pPage The page.
3674 * @param pShwPT The shadow page table (mapping of the page).
3675 * @param pGstPT The guest page table (just a half one).
3676 */
3677DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3678{
3679 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3680 if (pShwPT->a[i].n.u1Present)
3681 {
3682 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3683 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3684 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3685 }
3686}
3687
3688
3689/**
3690 * Clear references to guest physical memory in a PAE / PAE page table.
3691 *
3692 * @param pPool The pool.
3693 * @param pPage The page.
3694 * @param pShwPT The shadow page table (mapping of the page).
3695 * @param pGstPT The guest page table.
3696 */
3697DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3698{
3699 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3700 if (pShwPT->a[i].n.u1Present)
3701 {
3702 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3703 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3704 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3705 }
3706}
3707
3708
3709/**
3710 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3711 *
3712 * @param pPool The pool.
3713 * @param pPage The page.
3714 * @param pShwPT The shadow page table (mapping of the page).
3715 */
3716DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3717{
3718 RTGCPHYS GCPhys = pPage->GCPhys;
3719 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3720 if (pShwPT->a[i].n.u1Present)
3721 {
3722 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3723 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3724 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3725 }
3726}
3727
3728
3729/**
3730 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3731 *
3732 * @param pPool The pool.
3733 * @param pPage The page.
3734 * @param pShwPT The shadow page table (mapping of the page).
3735 */
3736DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3737{
3738 RTGCPHYS GCPhys = pPage->GCPhys;
3739 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3740 if (pShwPT->a[i].n.u1Present)
3741 {
3742 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3743 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3744 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3745 }
3746}
3747
3748#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3749
3750
3751#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3752/**
3753 * Clear references to shadowed pages in a 32 bits page directory.
3754 *
3755 * @param pPool The pool.
3756 * @param pPage The page.
3757 * @param pShwPD The shadow page directory (mapping of the page).
3758 */
3759DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
3760{
3761 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3762 {
3763 if ( pShwPD->a[i].n.u1Present
3764 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3765 )
3766 {
3767 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
3768 if (pSubPage)
3769 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3770 else
3771 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
3772 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3773 }
3774 }
3775}
3776#endif
3777
3778/**
3779 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3780 *
3781 * @param pPool The pool.
3782 * @param pPage The page.
3783 * @param pShwPD The shadow page directory (mapping of the page).
3784 */
3785DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3786{
3787 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3788 {
3789 if ( pShwPD->a[i].n.u1Present
3790#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3791 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3792#endif
3793 )
3794 {
3795 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3796 if (pSubPage)
3797 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3798 else
3799 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3800 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3801 }
3802 }
3803}
3804
3805
3806/**
3807 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3808 *
3809 * @param pPool The pool.
3810 * @param pPage The page.
3811 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3812 */
3813DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3814{
3815 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3816 {
3817 if ( pShwPDPT->a[i].n.u1Present
3818#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3819 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
3820#endif
3821 )
3822 {
3823 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3824 if (pSubPage)
3825 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3826 else
3827 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3828 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3829 }
3830 }
3831}
3832
3833
3834/**
3835 * Clear references to shadowed pages in a 64-bit level 4 page table.
3836 *
3837 * @param pPool The pool.
3838 * @param pPage The page.
3839 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3840 */
3841DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3842{
3843 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3844 {
3845 if (pShwPML4->a[i].n.u1Present)
3846 {
3847 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3848 if (pSubPage)
3849 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3850 else
3851 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3852 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3853 }
3854 }
3855}
3856
3857
3858/**
3859 * Clear references to shadowed pages in an EPT page table.
3860 *
3861 * @param pPool The pool.
3862 * @param pPage The page.
3863 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3864 */
3865DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3866{
3867 RTGCPHYS GCPhys = pPage->GCPhys;
3868 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3869 if (pShwPT->a[i].n.u1Present)
3870 {
3871 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3872 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3873 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3874 }
3875}
3876
3877
3878/**
3879 * Clear references to shadowed pages in an EPT page directory.
3880 *
3881 * @param pPool The pool.
3882 * @param pPage The page.
3883 * @param pShwPD The shadow page directory (mapping of the page).
3884 */
3885DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3886{
3887 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3888 {
3889 if (pShwPD->a[i].n.u1Present)
3890 {
3891 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3892 if (pSubPage)
3893 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3894 else
3895 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3896 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3897 }
3898 }
3899}
3900
3901
3902/**
3903 * Clear references to shadowed pages in an EPT page directory pointer table.
3904 *
3905 * @param pPool The pool.
3906 * @param pPage The page.
3907 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3908 */
3909DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3910{
3911 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3912 {
3913 if (pShwPDPT->a[i].n.u1Present)
3914 {
3915 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3916 if (pSubPage)
3917 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3918 else
3919 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3920 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3921 }
3922 }
3923}
3924
3925
3926/**
3927 * Clears all references made by this page.
3928 *
3929 * This includes other shadow pages and GC physical addresses.
3930 *
3931 * @param pPool The pool.
3932 * @param pPage The page.
3933 */
3934static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3935{
3936 /*
3937 * Map the shadow page and take action according to the page kind.
3938 */
3939 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3940 switch (pPage->enmKind)
3941 {
3942#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3943 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3944 {
3945 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3946 void *pvGst;
3947 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3948 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3949 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3950 break;
3951 }
3952
3953 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3954 {
3955 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3956 void *pvGst;
3957 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3958 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3959 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3960 break;
3961 }
3962
3963 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3964 {
3965 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3966 void *pvGst;
3967 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3968 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3969 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3970 break;
3971 }
3972
3973 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3974 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3975 {
3976 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3977 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3978 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3979 break;
3980 }
3981
3982 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3983 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3984 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3985 {
3986 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3987 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3988 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3989 break;
3990 }
3991
3992#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3993 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3994 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3995 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3996 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3997 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3998 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3999 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4000 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4001 break;
4002#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4003
4004 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4005 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4006 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4007 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4008 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4009 case PGMPOOLKIND_PAE_PD_PHYS:
4010 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4011 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4012 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4013 break;
4014
4015#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4016 case PGMPOOLKIND_32BIT_PD:
4017 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4018 break;
4019
4020 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4021 case PGMPOOLKIND_PAE_PDPT:
4022 case PGMPOOLKIND_PAE_PDPT_PHYS:
4023#endif
4024 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4025 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4026 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4027 break;
4028
4029 case PGMPOOLKIND_64BIT_PML4:
4030 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4031 break;
4032
4033 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4034 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4035 break;
4036
4037 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4038 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4039 break;
4040
4041 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4042 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4043 break;
4044
4045 default:
4046 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4047 }
4048
4049 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4050 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4051 ASMMemZeroPage(pvShw);
4052 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4053 pPage->fZeroed = true;
4054}
4055
4056#endif /* PGMPOOL_WITH_USER_TRACKING */
4057
4058/**
4059 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
4060 *
4061 * @param pPool The pool.
4062 */
4063static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
4064{
4065#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4066 /* Start a subset so we won't run out of mapping space. */
4067 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4068 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4069#endif
4070
4071 /*
4072 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
4073 */
4074 Assert(NIL_PGMPOOL_IDX == 0);
4075 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
4076 {
4077 /*
4078 * Get the page address.
4079 */
4080 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4081 union
4082 {
4083 uint64_t *pau64;
4084 uint32_t *pau32;
4085 } u;
4086
4087 /*
4088 * Mark stuff not present.
4089 */
4090 switch (pPage->enmKind)
4091 {
4092#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4093 case PGMPOOLKIND_ROOT_32BIT_PD:
4094 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4095 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
4096 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
4097 u.pau32[iPage] = 0;
4098 break;
4099
4100 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4101 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4102 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
4103 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
4104 u.pau64[iPage] = 0;
4105 break;
4106
4107 case PGMPOOLKIND_ROOT_PDPT:
4108 /* Not root of shadowed pages currently, ignore it. */
4109 break;
4110#endif
4111
4112 case PGMPOOLKIND_ROOT_NESTED:
4113 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4114 ASMMemZero32(u.pau64, PAGE_SIZE);
4115 break;
4116 }
4117 }
4118
4119 /*
4120 * Paranoia (to be removed), flag a global CR3 sync.
4121 */
4122 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
4123
4124#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4125 /* Pop the subset. */
4126 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4127#endif
4128}
4129
4130
4131/**
4132 * Flushes the entire cache.
4133 *
4134 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4135 * and execute this CR3 flush.
4136 *
4137 * @param pPool The pool.
4138 */
4139static void pgmPoolFlushAllInt(PPGMPOOL pPool)
4140{
4141 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
4142 LogFlow(("pgmPoolFlushAllInt:\n"));
4143
4144 /*
4145 * If there are no pages in the pool, there is nothing to do.
4146 */
4147 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4148 {
4149 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4150 return;
4151 }
4152
4153 /*
4154 * Nuke the free list and reinsert all pages into it.
4155 */
4156 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4157 {
4158 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4159
4160#ifdef IN_RING3
4161 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMR3, pPage->pvPageR3));
4162#endif
4163#ifdef PGMPOOL_WITH_MONITORING
4164 if (pPage->fMonitored)
4165 pgmPoolMonitorFlush(pPool, pPage);
4166 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4167 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4168 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4169 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4170 pPage->cModifications = 0;
4171#endif
4172 pPage->GCPhys = NIL_RTGCPHYS;
4173 pPage->enmKind = PGMPOOLKIND_FREE;
4174 Assert(pPage->idx == i);
4175 pPage->iNext = i + 1;
4176 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4177 pPage->fSeenNonGlobal = false;
4178 pPage->fMonitored= false;
4179 pPage->fCached = false;
4180 pPage->fReusedFlushPending = false;
4181#ifdef PGMPOOL_WITH_USER_TRACKING
4182 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4183#else
4184 pPage->fCR3Mix = false;
4185#endif
4186#ifdef PGMPOOL_WITH_CACHE
4187 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4188 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4189#endif
4190 }
4191 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4192 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4193 pPool->cUsedPages = 0;
4194
4195#ifdef PGMPOOL_WITH_USER_TRACKING
4196 /*
4197 * Zap and reinitialize the user records.
4198 */
4199 pPool->cPresent = 0;
4200 pPool->iUserFreeHead = 0;
4201 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4202 const unsigned cMaxUsers = pPool->cMaxUsers;
4203 for (unsigned i = 0; i < cMaxUsers; i++)
4204 {
4205 paUsers[i].iNext = i + 1;
4206 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4207 paUsers[i].iUserTable = 0xfffffffe;
4208 }
4209 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4210#endif
4211
4212#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4213 /*
4214 * Clear all the GCPhys links and rebuild the phys ext free list.
4215 */
4216 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4217 pRam;
4218 pRam = pRam->CTX_SUFF(pNext))
4219 {
4220 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4221 while (iPage-- > 0)
4222 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
4223 }
4224
4225 pPool->iPhysExtFreeHead = 0;
4226 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4227 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4228 for (unsigned i = 0; i < cMaxPhysExts; i++)
4229 {
4230 paPhysExts[i].iNext = i + 1;
4231 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4232 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4233 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4234 }
4235 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4236#endif
4237
4238#ifdef PGMPOOL_WITH_MONITORING
4239 /*
4240 * Just zap the modified list.
4241 */
4242 pPool->cModifiedPages = 0;
4243 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4244#endif
4245
4246#ifdef PGMPOOL_WITH_CACHE
4247 /*
4248 * Clear the GCPhys hash and the age list.
4249 */
4250 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4251 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4252 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4253 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4254#endif
4255
4256 /*
4257 * Flush all the special root pages.
4258 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4259 */
4260 pgmPoolFlushAllSpecialRoots(pPool);
4261 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4262 {
4263 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4264 pPage->iNext = NIL_PGMPOOL_IDX;
4265#ifdef PGMPOOL_WITH_MONITORING
4266 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4267 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4268 pPage->cModifications = 0;
4269 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4270 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4271 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4272 if (pPage->fMonitored)
4273 {
4274 PVM pVM = pPool->CTX_SUFF(pVM);
4275 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4276 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4277 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4278 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4279 pPool->pszAccessHandler);
4280 AssertFatalRCSuccess(rc);
4281# ifdef PGMPOOL_WITH_CACHE
4282 pgmPoolHashInsert(pPool, pPage);
4283# endif
4284 }
4285#endif
4286#ifdef PGMPOOL_WITH_USER_TRACKING
4287 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4288#endif
4289#ifdef PGMPOOL_WITH_CACHE
4290 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4291 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4292#endif
4293 }
4294
4295 /*
4296 * Finally, assert the FF.
4297 */
4298 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
4299
4300 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4301}
4302
4303
4304/**
4305 * Flushes a pool page.
4306 *
4307 * This moves the page to the free list after removing all user references to it.
4308 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
4309 *
4310 * @returns VBox status code.
4311 * @retval VINF_SUCCESS on success.
4312 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
4313 * @param pPool The pool.
4314 * @param HCPhys The HC physical address of the shadow page.
4315 */
4316int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4317{
4318 int rc = VINF_SUCCESS;
4319 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4320 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4321 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4322
4323 /*
4324 * Quietly reject any attempts at flushing any of the special root pages.
4325 */
4326 if (pPage->idx < PGMPOOL_IDX_FIRST)
4327 {
4328 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4329 return VINF_SUCCESS;
4330 }
4331
4332 /*
4333 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4334 */
4335 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
4336 {
4337#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4338 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4339 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4340 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4341 || pPage->enmKind == PGMPOOLKIND_32BIT_PD,
4342 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4343#else
4344 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4,
4345 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4346#endif
4347 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4348 return VINF_SUCCESS;
4349 }
4350
4351#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4352 /* Start a subset so we won't run out of mapping space. */
4353 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4354 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4355#endif
4356
4357 /*
4358 * Mark the page as being in need of a ASMMemZeroPage().
4359 */
4360 pPage->fZeroed = false;
4361
4362#ifdef PGMPOOL_WITH_USER_TRACKING
4363 /*
4364 * Clear the page.
4365 */
4366 pgmPoolTrackClearPageUsers(pPool, pPage);
4367 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4368 pgmPoolTrackDeref(pPool, pPage);
4369 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4370#endif
4371
4372#ifdef PGMPOOL_WITH_CACHE
4373 /*
4374 * Flush it from the cache.
4375 */
4376 pgmPoolCacheFlushPage(pPool, pPage);
4377#endif /* PGMPOOL_WITH_CACHE */
4378
4379#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4380 /* Heavy stuff done. */
4381 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4382#endif
4383
4384#ifdef PGMPOOL_WITH_MONITORING
4385 /*
4386 * Deregistering the monitoring.
4387 */
4388 if (pPage->fMonitored)
4389 rc = pgmPoolMonitorFlush(pPool, pPage);
4390#endif
4391
4392 /*
4393 * Free the page.
4394 */
4395 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4396 pPage->iNext = pPool->iFreeHead;
4397 pPool->iFreeHead = pPage->idx;
4398 pPage->enmKind = PGMPOOLKIND_FREE;
4399 pPage->GCPhys = NIL_RTGCPHYS;
4400 pPage->fReusedFlushPending = false;
4401
4402 pPool->cUsedPages--;
4403 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4404 return rc;
4405}
4406
4407
4408/**
4409 * Frees a usage of a pool page.
4410 *
4411 * The caller is responsible to updating the user table so that it no longer
4412 * references the shadow page.
4413 *
4414 * @param pPool The pool.
4415 * @param HCPhys The HC physical address of the shadow page.
4416 * @param iUser The shadow page pool index of the user table.
4417 * @param iUserTable The index into the user table (shadowed).
4418 */
4419void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4420{
4421 STAM_PROFILE_START(&pPool->StatFree, a);
4422 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4423 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4424 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4425#ifdef PGMPOOL_WITH_USER_TRACKING
4426 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4427#endif
4428#ifdef PGMPOOL_WITH_CACHE
4429 if (!pPage->fCached)
4430#endif
4431 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
4432 STAM_PROFILE_STOP(&pPool->StatFree, a);
4433}
4434
4435
4436/**
4437 * Makes one or more free page free.
4438 *
4439 * @returns VBox status code.
4440 * @retval VINF_SUCCESS on success.
4441 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4442 *
4443 * @param pPool The pool.
4444 * @param iUser The user of the page.
4445 */
4446static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
4447{
4448 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4449
4450 /*
4451 * If the pool isn't full grown yet, expand it.
4452 */
4453 if (pPool->cCurPages < pPool->cMaxPages)
4454 {
4455 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4456#ifdef IN_RING3
4457 int rc = PGMR3PoolGrow(pPool->pVMR3);
4458#else
4459 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
4460#endif
4461 if (RT_FAILURE(rc))
4462 return rc;
4463 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4464 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4465 return VINF_SUCCESS;
4466 }
4467
4468#ifdef PGMPOOL_WITH_CACHE
4469 /*
4470 * Free one cached page.
4471 */
4472 return pgmPoolCacheFreeOne(pPool, iUser);
4473#else
4474 /*
4475 * Flush the pool.
4476 *
4477 * If we have tracking enabled, it should be possible to come up with
4478 * a cheap replacement strategy...
4479 */
4480 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
4481 Assert(!CPUMIsGuestInLongMode(pVM));
4482 pgmPoolFlushAllInt(pPool);
4483 return VERR_PGM_POOL_FLUSHED;
4484#endif
4485}
4486
4487
4488/**
4489 * Allocates a page from the pool.
4490 *
4491 * This page may actually be a cached page and not in need of any processing
4492 * on the callers part.
4493 *
4494 * @returns VBox status code.
4495 * @retval VINF_SUCCESS if a NEW page was allocated.
4496 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4497 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4498 * @param pVM The VM handle.
4499 * @param GCPhys The GC physical address of the page we're gonna shadow.
4500 * For 4MB and 2MB PD entries, it's the first address the
4501 * shadow PT is covering.
4502 * @param enmKind The kind of mapping.
4503 * @param iUser The shadow page pool index of the user table.
4504 * @param iUserTable The index into the user table (shadowed).
4505 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4506 */
4507int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
4508{
4509 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4510 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4511 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4512 *ppPage = NULL;
4513 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4514 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4515 * Assert(!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4516
4517#ifdef PGMPOOL_WITH_CACHE
4518 if (pPool->fCacheEnabled)
4519 {
4520 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
4521 if (RT_SUCCESS(rc2))
4522 {
4523 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4524 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4525 return rc2;
4526 }
4527 }
4528#endif
4529
4530 /*
4531 * Allocate a new one.
4532 */
4533 int rc = VINF_SUCCESS;
4534 uint16_t iNew = pPool->iFreeHead;
4535 if (iNew == NIL_PGMPOOL_IDX)
4536 {
4537 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
4538 if (RT_FAILURE(rc))
4539 {
4540 if (rc != VERR_PGM_POOL_CLEARED)
4541 {
4542 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4543 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4544 return rc;
4545 }
4546 Log(("pgmPoolMakeMoreFreePages failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
4547 rc = VERR_PGM_POOL_FLUSHED;
4548 }
4549 iNew = pPool->iFreeHead;
4550 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4551 }
4552
4553 /* unlink the free head */
4554 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4555 pPool->iFreeHead = pPage->iNext;
4556 pPage->iNext = NIL_PGMPOOL_IDX;
4557
4558 /*
4559 * Initialize it.
4560 */
4561 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4562 pPage->enmKind = enmKind;
4563 pPage->GCPhys = GCPhys;
4564 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4565 pPage->fMonitored = false;
4566 pPage->fCached = false;
4567 pPage->fReusedFlushPending = false;
4568#ifdef PGMPOOL_WITH_MONITORING
4569 pPage->cModifications = 0;
4570 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4571 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4572#else
4573 pPage->fCR3Mix = false;
4574#endif
4575#ifdef PGMPOOL_WITH_USER_TRACKING
4576 pPage->cPresent = 0;
4577 pPage->iFirstPresent = ~0;
4578
4579 /*
4580 * Insert into the tracking and cache. If this fails, free the page.
4581 */
4582 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4583 if (RT_FAILURE(rc3))
4584 {
4585 if (rc3 != VERR_PGM_POOL_CLEARED)
4586 {
4587 pPool->cUsedPages--;
4588 pPage->enmKind = PGMPOOLKIND_FREE;
4589 pPage->GCPhys = NIL_RTGCPHYS;
4590 pPage->iNext = pPool->iFreeHead;
4591 pPool->iFreeHead = pPage->idx;
4592 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4593 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4594 return rc3;
4595 }
4596 Log(("pgmPoolTrackInsert failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4597 rc = VERR_PGM_POOL_FLUSHED;
4598 }
4599#endif /* PGMPOOL_WITH_USER_TRACKING */
4600
4601 /*
4602 * Commit the allocation, clear the page and return.
4603 */
4604#ifdef VBOX_WITH_STATISTICS
4605 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4606 pPool->cUsedPagesHigh = pPool->cUsedPages;
4607#endif
4608
4609 if (!pPage->fZeroed)
4610 {
4611 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4612 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4613 ASMMemZeroPage(pv);
4614 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4615 }
4616
4617 *ppPage = pPage;
4618 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4619 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4620 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4621 return rc;
4622}
4623
4624
4625/**
4626 * Frees a usage of a pool page.
4627 *
4628 * @param pVM The VM handle.
4629 * @param HCPhys The HC physical address of the shadow page.
4630 * @param iUser The shadow page pool index of the user table.
4631 * @param iUserTable The index into the user table (shadowed).
4632 */
4633void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4634{
4635 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4636 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4637 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4638}
4639
4640
4641/**
4642 * Gets a in-use page in the pool by it's physical address.
4643 *
4644 * @returns Pointer to the page.
4645 * @param pVM The VM handle.
4646 * @param HCPhys The HC physical address of the shadow page.
4647 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4648 */
4649PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4650{
4651 /** @todo profile this! */
4652 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4653 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4654 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%RHp -> %p:{.idx=%d .GCPhys=%RGp .enmKind=%s}\n",
4655 HCPhys, pPage, pPage->idx, pPage->GCPhys, pgmPoolPoolKindToStr(pPage->enmKind)));
4656 return pPage;
4657}
4658
4659
4660/**
4661 * Flushes the entire cache.
4662 *
4663 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4664 * and execute this CR3 flush.
4665 *
4666 * @param pPool The pool.
4667 */
4668void pgmPoolFlushAll(PVM pVM)
4669{
4670 LogFlow(("pgmPoolFlushAll:\n"));
4671 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4672}
4673
4674#ifdef LOG_ENABLED
4675static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
4676{
4677 switch(enmKind)
4678 {
4679 case PGMPOOLKIND_INVALID:
4680 return "PGMPOOLKIND_INVALID";
4681 case PGMPOOLKIND_FREE:
4682 return "PGMPOOLKIND_FREE";
4683 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4684 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4685 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4686 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4687 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4688 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4689 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4690 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4691 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4692 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4693 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4694 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
4695 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4696 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
4697 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4698 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
4699 case PGMPOOLKIND_32BIT_PD:
4700 return "PGMPOOLKIND_32BIT_PD";
4701 case PGMPOOLKIND_32BIT_PD_PHYS:
4702 return "PGMPOOLKIND_32BIT_PD_PHYS";
4703 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4704 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
4705 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4706 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
4707 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4708 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
4709 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4710 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
4711 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4712 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
4713 case PGMPOOLKIND_PAE_PD_PHYS:
4714 return "PGMPOOLKIND_PAE_PD_PHYS";
4715 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4716 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
4717 case PGMPOOLKIND_PAE_PDPT:
4718 return "PGMPOOLKIND_PAE_PDPT";
4719 case PGMPOOLKIND_PAE_PDPT_PHYS:
4720 return "PGMPOOLKIND_PAE_PDPT_PHYS";
4721 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4722 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
4723 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4724 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
4725 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4726 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
4727 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4728 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
4729 case PGMPOOLKIND_64BIT_PML4:
4730 return "PGMPOOLKIND_64BIT_PML4";
4731 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4732 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
4733 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4734 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
4735 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4736 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
4737#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4738 case PGMPOOLKIND_ROOT_32BIT_PD:
4739 return "PGMPOOLKIND_ROOT_32BIT_PD";
4740 case PGMPOOLKIND_ROOT_PAE_PD:
4741 return "PGMPOOLKIND_ROOT_PAE_PD";
4742 case PGMPOOLKIND_ROOT_PDPT:
4743 return "PGMPOOLKIND_ROOT_PDPT";
4744#endif
4745 case PGMPOOLKIND_ROOT_NESTED:
4746 return "PGMPOOLKIND_ROOT_NESTED";
4747 }
4748 return "Unknown kind!";
4749}
4750#endif
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette