VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 17393

Last change on this file since 17393 was 17393, checked in by vboxsync, 16 years ago

Logging changes

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 171.9 KB
Line 
1/* $Id: PGMAllPool.cpp 17393 2009-03-05 12:39:31Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48__BEGIN_DECLS
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50#ifdef PGMPOOL_WITH_USER_TRACKING
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54#endif
55#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
56static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
57#endif
58#ifdef PGMPOOL_WITH_CACHE
59static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
60#endif
61#ifdef PGMPOOL_WITH_MONITORING
62static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
63#endif
64#ifndef IN_RING3
65DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
66#endif
67#ifdef LOG_ENABLED
68static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
69#endif
70__END_DECLS
71
72
73/**
74 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
75 *
76 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
77 * @param enmKind The page kind.
78 */
79DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
80{
81 switch (enmKind)
82 {
83 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
84 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
85 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
86 return true;
87 default:
88 return false;
89 }
90}
91
92
93#if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
94/**
95 * Maps a pool page into the current context.
96 *
97 * @returns Pointer to the mapping.
98 * @param pPGM Pointer to the PGM instance data.
99 * @param pPage The page to map.
100 */
101void *pgmPoolMapPageFallback(PPGM pPGM, PPGMPOOLPAGE pPage)
102{
103 /* general pages are take care of by the inlined part, it
104 only ends up here in case of failure. */
105 AssertReleaseReturn(pPage->idx < PGMPOOL_IDX_FIRST, NULL);
106
107/** @todo make sure HCPhys is valid for *all* indexes. */
108 /* special pages. */
109# ifdef IN_RC
110 switch (pPage->idx)
111 {
112# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
113 case PGMPOOL_IDX_PD:
114 case PGMPOOL_IDX_PDPT:
115 case PGMPOOL_IDX_AMD64_CR3:
116 return pPGM->pShwRootRC;
117# else
118 case PGMPOOL_IDX_PD:
119 return pPGM->pShw32BitPdRC;
120 case PGMPOOL_IDX_PAE_PD:
121 case PGMPOOL_IDX_PAE_PD_0:
122 return pPGM->apShwPaePDsRC[0];
123 case PGMPOOL_IDX_PAE_PD_1:
124 return pPGM->apShwPaePDsRC[1];
125 case PGMPOOL_IDX_PAE_PD_2:
126 return pPGM->apShwPaePDsRC[2];
127 case PGMPOOL_IDX_PAE_PD_3:
128 return pPGM->apShwPaePDsRC[3];
129 case PGMPOOL_IDX_PDPT:
130 return pPGM->pShwPaePdptRC;
131# endif
132 default:
133 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
134 return NULL;
135 }
136
137# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
138 RTHCPHYS HCPhys;
139 switch (pPage->idx)
140 {
141# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
142 case PGMPOOL_IDX_PD:
143 case PGMPOOL_IDX_PDPT:
144 case PGMPOOL_IDX_AMD64_CR3:
145 HCPhys = pPGM->HCPhysShwCR3;
146 break;
147
148 case PGMPOOL_IDX_NESTED_ROOT:
149 HCPhys = pPGM->HCPhysShwNestedRoot;
150 break;
151# else
152 case PGMPOOL_IDX_PD:
153 HCPhys = pPGM->HCPhysShw32BitPD;
154 break;
155 case PGMPOOL_IDX_PAE_PD_0:
156 HCPhys = pPGM->aHCPhysPaePDs[0];
157 break;
158 case PGMPOOL_IDX_PAE_PD_1:
159 HCPhys = pPGM->aHCPhysPaePDs[1];
160 break;
161 case PGMPOOL_IDX_PAE_PD_2:
162 HCPhys = pPGM->aHCPhysPaePDs[2];
163 break;
164 case PGMPOOL_IDX_PAE_PD_3:
165 HCPhys = pPGM->aHCPhysPaePDs[3];
166 break;
167 case PGMPOOL_IDX_PDPT:
168 HCPhys = pPGM->HCPhysShwPaePdpt;
169 break;
170 case PGMPOOL_IDX_NESTED_ROOT:
171 HCPhys = pPGM->HCPhysShwNestedRoot;
172 break;
173 case PGMPOOL_IDX_PAE_PD:
174 AssertReleaseMsgFailed(("PGMPOOL_IDX_PAE_PD is not usable in VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 context\n"));
175 return NULL;
176# endif
177 default:
178 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
179 return NULL;
180 }
181 AssertMsg(HCPhys && HCPhys != NIL_RTHCPHYS && !(PAGE_OFFSET_MASK & HCPhys), ("%RHp\n", HCPhys));
182
183 void *pv;
184 pgmR0DynMapHCPageInlined(pPGM, HCPhys, &pv);
185 return pv;
186# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
187}
188#endif /* IN_RC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
189
190
191#ifdef PGMPOOL_WITH_MONITORING
192/**
193 * Determin the size of a write instruction.
194 * @returns number of bytes written.
195 * @param pDis The disassembler state.
196 */
197static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
198{
199 /*
200 * This is very crude and possibly wrong for some opcodes,
201 * but since it's not really supposed to be called we can
202 * probably live with that.
203 */
204 return DISGetParamSize(pDis, &pDis->param1);
205}
206
207
208/**
209 * Flushes a chain of pages sharing the same access monitor.
210 *
211 * @returns VBox status code suitable for scheduling.
212 * @param pPool The pool.
213 * @param pPage A page in the chain.
214 */
215int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
216{
217 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
218
219 /*
220 * Find the list head.
221 */
222 uint16_t idx = pPage->idx;
223 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
224 {
225 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
226 {
227 idx = pPage->iMonitoredPrev;
228 Assert(idx != pPage->idx);
229 pPage = &pPool->aPages[idx];
230 }
231 }
232
233 /*
234 * Iterate the list flushing each shadow page.
235 */
236 int rc = VINF_SUCCESS;
237 for (;;)
238 {
239 idx = pPage->iMonitoredNext;
240 Assert(idx != pPage->idx);
241 if (pPage->idx >= PGMPOOL_IDX_FIRST)
242 {
243 int rc2 = pgmPoolFlushPage(pPool, pPage);
244 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
245 rc = VINF_PGM_SYNC_CR3;
246 }
247 /* next */
248 if (idx == NIL_PGMPOOL_IDX)
249 break;
250 pPage = &pPool->aPages[idx];
251 }
252 return rc;
253}
254
255
256/**
257 * Wrapper for getting the current context pointer to the entry being modified.
258 *
259 * @returns VBox status code suitable for scheduling.
260 * @param pVM VM Handle.
261 * @param pvDst Destination address
262 * @param pvSrc Source guest virtual address.
263 * @param GCPhysSrc The source guest physical address.
264 * @param cb Size of data to read
265 */
266DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
267{
268#if defined(IN_RING3)
269 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
270 return VINF_SUCCESS;
271#else
272 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
273 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
274#endif
275}
276
277/**
278 * Process shadow entries before they are changed by the guest.
279 *
280 * For PT entries we will clear them. For PD entries, we'll simply check
281 * for mapping conflicts and set the SyncCR3 FF if found.
282 *
283 * @param pPool The pool.
284 * @param pPage The head page.
285 * @param GCPhysFault The guest physical fault address.
286 * @param uAddress In R0 and GC this is the guest context fault address (flat).
287 * In R3 this is the host context 'fault' address.
288 * @param pCpu The disassembler state for figuring out the write size.
289 * This need not be specified if the caller knows we won't do cross entry accesses.
290 */
291void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, PDISCPUSTATE pCpu)
292{
293 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
294 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
295 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
296
297 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%s cbWrite=%d\n", pvAddress, GCPhysFault, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
298 for (;;)
299 {
300 union
301 {
302 void *pv;
303 PX86PT pPT;
304 PX86PTPAE pPTPae;
305 PX86PD pPD;
306 PX86PDPAE pPDPae;
307 PX86PDPT pPDPT;
308 PX86PML4 pPML4;
309 } uShw;
310
311 switch (pPage->enmKind)
312 {
313 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
314 {
315 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
316 const unsigned iShw = off / sizeof(X86PTE);
317 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
318 if (uShw.pPT->a[iShw].n.u1Present)
319 {
320# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
321 X86PTE GstPte;
322
323 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
324 AssertRC(rc);
325 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
326 pgmPoolTracDerefGCPhysHint(pPool, pPage,
327 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
328 GstPte.u & X86_PTE_PG_MASK);
329# endif
330 uShw.pPT->a[iShw].u = 0;
331 }
332 break;
333 }
334
335 /* page/2 sized */
336 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
337 {
338 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
339 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
340 {
341 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
342 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
343 if (uShw.pPTPae->a[iShw].n.u1Present)
344 {
345# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
346 X86PTE GstPte;
347 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
348 AssertRC(rc);
349
350 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
351 pgmPoolTracDerefGCPhysHint(pPool, pPage,
352 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
353 GstPte.u & X86_PTE_PG_MASK);
354# endif
355 uShw.pPTPae->a[iShw].u = 0;
356 }
357 }
358 break;
359 }
360
361# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
362 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
363 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
364 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
365 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
366 {
367 unsigned iGst = off / sizeof(X86PDE);
368 unsigned iShwPdpt = iGst / 256;
369 unsigned iShw = (iGst % 256) * 2;
370 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
371
372 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x idx = %d page idx=%d\n", iGst, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
373 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
374 {
375 for (unsigned i = 0; i < 2; i++)
376 {
377# ifndef IN_RING0
378 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
379 {
380 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
381 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
382 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
383 break;
384 }
385 else
386# endif /* !IN_RING0 */
387 if (uShw.pPDPae->a[iShw+i].n.u1Present)
388 {
389 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
390 pgmPoolFree(pPool->CTX_SUFF(pVM),
391 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
392 pPage->idx,
393 iShw + i);
394 uShw.pPDPae->a[iShw+i].u = 0;
395 }
396
397 /* paranoia / a bit assumptive. */
398 if ( pCpu
399 && (off & 3)
400 && (off & 3) + cbWrite > 4)
401 {
402 const unsigned iShw2 = iShw + 2 + i;
403 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
404 {
405# ifndef IN_RING0
406 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
407 {
408 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
409 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
410 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
411 break;
412 }
413 else
414# endif /* !IN_RING0 */
415 if (uShw.pPDPae->a[iShw2].n.u1Present)
416 {
417 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
418 pgmPoolFree(pPool->CTX_SUFF(pVM),
419 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
420 pPage->idx,
421 iShw2);
422 uShw.pPDPae->a[iShw2].u = 0;
423 }
424 }
425 }
426 }
427 }
428 break;
429 }
430# endif
431
432
433 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
434 {
435 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
436 const unsigned iShw = off / sizeof(X86PTEPAE);
437 if (uShw.pPTPae->a[iShw].n.u1Present)
438 {
439# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
440 X86PTEPAE GstPte;
441 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
442 AssertRC(rc);
443
444 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
445 pgmPoolTracDerefGCPhysHint(pPool, pPage,
446 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
447 GstPte.u & X86_PTE_PAE_PG_MASK);
448# endif
449 uShw.pPTPae->a[iShw].u = 0;
450 }
451
452 /* paranoia / a bit assumptive. */
453 if ( pCpu
454 && (off & 7)
455 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
456 {
457 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
458 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
459
460 if (uShw.pPTPae->a[iShw2].n.u1Present)
461 {
462# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
463 X86PTEPAE GstPte;
464# ifdef IN_RING3
465 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
466# else
467 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
468# endif
469 AssertRC(rc);
470 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
471 pgmPoolTracDerefGCPhysHint(pPool, pPage,
472 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
473 GstPte.u & X86_PTE_PAE_PG_MASK);
474# endif
475 uShw.pPTPae->a[iShw2].u = 0;
476 }
477 }
478 break;
479 }
480
481# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
482 case PGMPOOLKIND_32BIT_PD:
483# else
484 case PGMPOOLKIND_ROOT_32BIT_PD:
485# endif
486 {
487 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
488 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
489
490# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
491 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
492# endif
493# ifndef IN_RING0
494 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
495 {
496 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
497 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
498 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
499 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
500 break;
501 }
502# endif /* !IN_RING0 */
503# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
504# ifndef IN_RING0
505 else
506# endif /* !IN_RING0 */
507 {
508 if (uShw.pPD->a[iShw].n.u1Present)
509 {
510 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
511 pgmPoolFree(pPool->CTX_SUFF(pVM),
512 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
513 pPage->idx,
514 iShw);
515 uShw.pPD->a[iShw].u = 0;
516 }
517 }
518# endif
519 /* paranoia / a bit assumptive. */
520 if ( pCpu
521 && (off & 3)
522 && (off & 3) + cbWrite > sizeof(X86PTE))
523 {
524 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
525 if ( iShw2 != iShw
526 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
527 {
528# ifndef IN_RING0
529 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
530 {
531 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
532 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
533 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
534 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
535 }
536# endif /* !IN_RING0 */
537# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
538# ifndef IN_RING0
539 else
540# endif /* !IN_RING0 */
541 {
542 if (uShw.pPD->a[iShw2].n.u1Present)
543 {
544 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
545 pgmPoolFree(pPool->CTX_SUFF(pVM),
546 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
547 pPage->idx,
548 iShw2);
549 uShw.pPD->a[iShw2].u = 0;
550 }
551 }
552# endif
553 }
554 }
555#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
556 if ( uShw.pPD->a[iShw].n.u1Present
557 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
558 {
559 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
560# ifdef IN_RC /* TLB load - we're pushing things a bit... */
561 ASMProbeReadByte(pvAddress);
562# endif
563 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
564 uShw.pPD->a[iShw].u = 0;
565 }
566#endif
567 break;
568 }
569
570# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
571 case PGMPOOLKIND_ROOT_PAE_PD:
572 {
573 unsigned iGst = off / sizeof(X86PDE); // ASSUMING 32-bit guest paging!
574 unsigned iShwPdpt = iGst / 256;
575 unsigned iShw = (iGst % 256) * 2;
576 Assert(pPage->idx == PGMPOOL_IDX_PAE_PD);
577 PPGMPOOLPAGE pPage2 = pPage + 1 + iShwPdpt;
578 Assert(pPage2->idx == PGMPOOL_IDX_PAE_PD_0 + iShwPdpt);
579 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage2);
580 for (unsigned i = 0; i < 2; i++, iShw++)
581 {
582 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
583 {
584 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
585 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
586 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw));
587 }
588 /* paranoia / a bit assumptive. */
589 else if ( pCpu
590 && (off & 3)
591 && (off & 3) + cbWrite > 4)
592 {
593 const unsigned iShw2 = iShw + 2;
594 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a) /** @todo was completely wrong, it's better now after #1865 but still wrong from cross PD. */
595 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
596 {
597 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
598 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
599 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
600 }
601 }
602#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
603 if ( uShw.pPDPae->a[iShw].n.u1Present
604 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
605 {
606 LogFlow(("pgmPoolMonitorChainChanging: iShwPdpt=%#x iShw=%#x: %RX64 -> freeing it!\n", iShwPdpt, iShw, uShw.pPDPae->a[iShw].u));
607# ifdef IN_RC /* TLB load - we're pushing things a bit... */
608 ASMProbeReadByte(pvAddress);
609# endif
610 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + iShwPdpt * X86_PG_PAE_ENTRIES);
611 uShw.pPDPae->a[iShw].u = 0;
612 }
613#endif
614 }
615 break;
616 }
617# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
618
619 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
620 {
621 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
622 const unsigned iShw = off / sizeof(X86PDEPAE);
623#ifndef IN_RING0
624 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
625 {
626 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
627 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
628 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
629 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
630 break;
631 }
632#endif /* !IN_RING0 */
633#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
634 /*
635 * Causes trouble when the guest uses a PDE to refer to the whole page table level
636 * structure. (Invalidate here; faults later on when it tries to change the page
637 * table entries -> recheck; probably only applies to the RC case.)
638 */
639# ifndef IN_RING0
640 else
641# endif /* !IN_RING0 */
642 {
643 if (uShw.pPDPae->a[iShw].n.u1Present)
644 {
645 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
646 pgmPoolFree(pPool->CTX_SUFF(pVM),
647 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
648# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
649 pPage->idx,
650 iShw);
651# else
652 /* Note: hardcoded PAE implementation dependency */
653 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
654 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
655# endif
656 uShw.pPDPae->a[iShw].u = 0;
657 }
658 }
659#endif
660 /* paranoia / a bit assumptive. */
661 if ( pCpu
662 && (off & 7)
663 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
664 {
665 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
666 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
667
668#ifndef IN_RING0
669 if ( iShw2 != iShw
670 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
671 {
672 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
673 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
674 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
675 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
676 }
677#endif /* !IN_RING0 */
678#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
679# ifndef IN_RING0
680 else
681# endif /* !IN_RING0 */
682 if (uShw.pPDPae->a[iShw2].n.u1Present)
683 {
684 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
685 pgmPoolFree(pPool->CTX_SUFF(pVM),
686 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
687# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
688 pPage->idx,
689 iShw2);
690# else
691 /* Note: hardcoded PAE implementation dependency */
692 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
693 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
694# endif
695 uShw.pPDPae->a[iShw2].u = 0;
696 }
697#endif
698 }
699 break;
700 }
701
702# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
703 case PGMPOOLKIND_PAE_PDPT:
704# else
705 case PGMPOOLKIND_ROOT_PDPT:
706# endif
707 {
708 /*
709 * Hopefully this doesn't happen very often:
710 * - touching unused parts of the page
711 * - messing with the bits of pd pointers without changing the physical address
712 */
713# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
714 /* PDPT roots are not page aligned; 32 byte only! */
715 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
716# else
717 const unsigned offPdpt = off;
718# endif
719 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
720 const unsigned iShw = offPdpt / sizeof(X86PDPE);
721 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
722 {
723# ifndef IN_RING0
724 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
725 {
726 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
727 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
728 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
729 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
730 break;
731 }
732# endif /* !IN_RING0 */
733# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
734# ifndef IN_RING0
735 else
736# endif /* !IN_RING0 */
737 if (uShw.pPDPT->a[iShw].n.u1Present)
738 {
739 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
740 pgmPoolFree(pPool->CTX_SUFF(pVM),
741 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
742 pPage->idx,
743 iShw);
744 uShw.pPDPT->a[iShw].u = 0;
745 }
746# endif
747
748 /* paranoia / a bit assumptive. */
749 if ( pCpu
750 && (offPdpt & 7)
751 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
752 {
753 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
754 if ( iShw2 != iShw
755 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
756 {
757# ifndef IN_RING0
758 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
759 {
760 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
761 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
762 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
763 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
764 }
765# endif /* !IN_RING0 */
766# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
767# ifndef IN_RING0
768 else
769# endif /* !IN_RING0 */
770 if (uShw.pPDPT->a[iShw2].n.u1Present)
771 {
772 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
773 pgmPoolFree(pPool->CTX_SUFF(pVM),
774 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
775 pPage->idx,
776 iShw2);
777 uShw.pPDPT->a[iShw2].u = 0;
778 }
779# endif
780 }
781 }
782 }
783 break;
784 }
785
786#ifndef IN_RC
787 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
788 {
789 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
790 const unsigned iShw = off / sizeof(X86PDEPAE);
791 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
792 if (uShw.pPDPae->a[iShw].n.u1Present)
793 {
794 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
795 pgmPoolFree(pPool->CTX_SUFF(pVM),
796 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
797 pPage->idx,
798 iShw);
799 uShw.pPDPae->a[iShw].u = 0;
800 }
801 /* paranoia / a bit assumptive. */
802 if ( pCpu
803 && (off & 7)
804 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
805 {
806 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
807 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
808
809 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
810 if (uShw.pPDPae->a[iShw2].n.u1Present)
811 {
812 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
813 pgmPoolFree(pPool->CTX_SUFF(pVM),
814 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
815 pPage->idx,
816 iShw2);
817 uShw.pPDPae->a[iShw2].u = 0;
818 }
819 }
820 break;
821 }
822
823 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
824 {
825 /*
826 * Hopefully this doesn't happen very often:
827 * - messing with the bits of pd pointers without changing the physical address
828 */
829# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
830 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
831# endif
832 {
833 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
834 const unsigned iShw = off / sizeof(X86PDPE);
835 if (uShw.pPDPT->a[iShw].n.u1Present)
836 {
837 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
838 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
839 uShw.pPDPT->a[iShw].u = 0;
840 }
841 /* paranoia / a bit assumptive. */
842 if ( pCpu
843 && (off & 7)
844 && (off & 7) + cbWrite > sizeof(X86PDPE))
845 {
846 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
847 if (uShw.pPDPT->a[iShw2].n.u1Present)
848 {
849 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
850 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
851 uShw.pPDPT->a[iShw2].u = 0;
852 }
853 }
854 }
855 break;
856 }
857
858 case PGMPOOLKIND_64BIT_PML4:
859 {
860 /*
861 * Hopefully this doesn't happen very often:
862 * - messing with the bits of pd pointers without changing the physical address
863 */
864# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
865 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
866# endif
867 {
868 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
869 const unsigned iShw = off / sizeof(X86PDPE);
870 if (uShw.pPML4->a[iShw].n.u1Present)
871 {
872 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
873 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
874 uShw.pPML4->a[iShw].u = 0;
875 }
876 /* paranoia / a bit assumptive. */
877 if ( pCpu
878 && (off & 7)
879 && (off & 7) + cbWrite > sizeof(X86PDPE))
880 {
881 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
882 if (uShw.pPML4->a[iShw2].n.u1Present)
883 {
884 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
885 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
886 uShw.pPML4->a[iShw2].u = 0;
887 }
888 }
889 }
890 break;
891 }
892#endif /* IN_RING0 */
893
894 default:
895 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
896 }
897
898 /* next */
899 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
900 return;
901 pPage = &pPool->aPages[pPage->iMonitoredNext];
902 }
903}
904
905# ifndef IN_RING3
906/**
907 * Checks if a access could be a fork operation in progress.
908 *
909 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
910 *
911 * @returns true if it's likly that we're forking, otherwise false.
912 * @param pPool The pool.
913 * @param pCpu The disassembled instruction.
914 * @param offFault The access offset.
915 */
916DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
917{
918 /*
919 * i386 linux is using btr to clear X86_PTE_RW.
920 * The functions involved are (2.6.16 source inspection):
921 * clear_bit
922 * ptep_set_wrprotect
923 * copy_one_pte
924 * copy_pte_range
925 * copy_pmd_range
926 * copy_pud_range
927 * copy_page_range
928 * dup_mmap
929 * dup_mm
930 * copy_mm
931 * copy_process
932 * do_fork
933 */
934 if ( pCpu->pCurInstr->opcode == OP_BTR
935 && !(offFault & 4)
936 /** @todo Validate that the bit index is X86_PTE_RW. */
937 )
938 {
939 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
940 return true;
941 }
942 return false;
943}
944
945
946/**
947 * Determine whether the page is likely to have been reused.
948 *
949 * @returns true if we consider the page as being reused for a different purpose.
950 * @returns false if we consider it to still be a paging page.
951 * @param pVM VM Handle.
952 * @param pPage The page in question.
953 * @param pRegFrame Trap register frame.
954 * @param pCpu The disassembly info for the faulting instruction.
955 * @param pvFault The fault address.
956 *
957 * @remark The REP prefix check is left to the caller because of STOSD/W.
958 */
959DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
960{
961#ifndef IN_RC
962 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
963 if ( HWACCMHasPendingIrq(pVM)
964 && (pRegFrame->rsp - pvFault) < 32)
965 {
966 /* Fault caused by stack writes while trying to inject an interrupt event. */
967 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
968 return true;
969 }
970#else
971 NOREF(pVM); NOREF(pvFault);
972#endif
973
974 switch (pCpu->pCurInstr->opcode)
975 {
976 /* call implies the actual push of the return address faulted */
977 case OP_CALL:
978 Log4(("pgmPoolMonitorIsReused: CALL\n"));
979 return true;
980 case OP_PUSH:
981 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
982 return true;
983 case OP_PUSHF:
984 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
985 return true;
986 case OP_PUSHA:
987 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
988 return true;
989 case OP_FXSAVE:
990 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
991 return true;
992 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
993 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
994 return true;
995 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
996 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
997 return true;
998 case OP_MOVSWD:
999 case OP_STOSWD:
1000 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
1001 && pRegFrame->rcx >= 0x40
1002 )
1003 {
1004 Assert(pCpu->mode == CPUMODE_64BIT);
1005
1006 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
1007 return true;
1008 }
1009 return false;
1010 }
1011 if ( (pCpu->param1.flags & USE_REG_GEN32)
1012 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
1013 {
1014 Log4(("pgmPoolMonitorIsReused: ESP\n"));
1015 return true;
1016 }
1017
1018 //if (pPage->fCR3Mix)
1019 // return false;
1020 return false;
1021}
1022
1023
1024/**
1025 * Flushes the page being accessed.
1026 *
1027 * @returns VBox status code suitable for scheduling.
1028 * @param pVM The VM handle.
1029 * @param pPool The pool.
1030 * @param pPage The pool page (head).
1031 * @param pCpu The disassembly of the write instruction.
1032 * @param pRegFrame The trap register frame.
1033 * @param GCPhysFault The fault address as guest physical address.
1034 * @param pvFault The fault address.
1035 */
1036static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1037 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1038{
1039 /*
1040 * First, do the flushing.
1041 */
1042 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
1043
1044 /*
1045 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
1046 */
1047 uint32_t cbWritten;
1048 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
1049 if (RT_SUCCESS(rc2))
1050 pRegFrame->rip += pCpu->opsize;
1051 else if (rc2 == VERR_EM_INTERPRETER)
1052 {
1053#ifdef IN_RC
1054 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
1055 {
1056 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
1057 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
1058 rc = VINF_SUCCESS;
1059 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
1060 }
1061 else
1062#endif
1063 {
1064 rc = VINF_EM_RAW_EMULATE_INSTR;
1065 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1066 }
1067 }
1068 else
1069 rc = rc2;
1070
1071 /* See use in pgmPoolAccessHandlerSimple(). */
1072 PGM_INVL_GUEST_TLBS();
1073
1074 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
1075 return rc;
1076
1077}
1078
1079
1080/**
1081 * Handles the STOSD write accesses.
1082 *
1083 * @returns VBox status code suitable for scheduling.
1084 * @param pVM The VM handle.
1085 * @param pPool The pool.
1086 * @param pPage The pool page (head).
1087 * @param pCpu The disassembly of the write instruction.
1088 * @param pRegFrame The trap register frame.
1089 * @param GCPhysFault The fault address as guest physical address.
1090 * @param pvFault The fault address.
1091 */
1092DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1093 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1094{
1095 Assert(pCpu->mode == CPUMODE_32BIT);
1096
1097 Log3(("pgmPoolAccessHandlerSTOSD\n"));
1098
1099 /*
1100 * Increment the modification counter and insert it into the list
1101 * of modified pages the first time.
1102 */
1103 if (!pPage->cModifications++)
1104 pgmPoolMonitorModifiedInsert(pPool, pPage);
1105
1106 /*
1107 * Execute REP STOSD.
1108 *
1109 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
1110 * write situation, meaning that it's safe to write here.
1111 */
1112#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1113 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1114#endif
1115 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
1116 while (pRegFrame->ecx)
1117 {
1118#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1119 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1120 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1121 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1122#else
1123 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1124#endif
1125#ifdef IN_RC
1126 *(uint32_t *)pu32 = pRegFrame->eax;
1127#else
1128 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
1129#endif
1130 pu32 += 4;
1131 GCPhysFault += 4;
1132 pRegFrame->edi += 4;
1133 pRegFrame->ecx--;
1134 }
1135 pRegFrame->rip += pCpu->opsize;
1136
1137#ifdef IN_RC
1138 /* See use in pgmPoolAccessHandlerSimple(). */
1139 PGM_INVL_GUEST_TLBS();
1140#endif
1141
1142 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1143 return VINF_SUCCESS;
1144}
1145
1146
1147/**
1148 * Handles the simple write accesses.
1149 *
1150 * @returns VBox status code suitable for scheduling.
1151 * @param pVM The VM handle.
1152 * @param pPool The pool.
1153 * @param pPage The pool page (head).
1154 * @param pCpu The disassembly of the write instruction.
1155 * @param pRegFrame The trap register frame.
1156 * @param GCPhysFault The fault address as guest physical address.
1157 * @param pvFault The fault address.
1158 */
1159DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1160 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1161{
1162 Log3(("pgmPoolAccessHandlerSimple\n"));
1163 /*
1164 * Increment the modification counter and insert it into the list
1165 * of modified pages the first time.
1166 */
1167 if (!pPage->cModifications++)
1168 pgmPoolMonitorModifiedInsert(pPool, pPage);
1169
1170 /*
1171 * Clear all the pages. ASSUMES that pvFault is readable.
1172 */
1173#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1174 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1175 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1176 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1177 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1178#else
1179 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1180#endif
1181
1182 /*
1183 * Interpret the instruction.
1184 */
1185 uint32_t cb;
1186 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
1187 if (RT_SUCCESS(rc))
1188 pRegFrame->rip += pCpu->opsize;
1189 else if (rc == VERR_EM_INTERPRETER)
1190 {
1191 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1192 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
1193 rc = VINF_EM_RAW_EMULATE_INSTR;
1194 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1195 }
1196
1197#ifdef IN_RC
1198 /*
1199 * Quick hack, with logging enabled we're getting stale
1200 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1201 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1202 * have to be fixed to support this. But that'll have to wait till next week.
1203 *
1204 * An alternative is to keep track of the changed PTEs together with the
1205 * GCPhys from the guest PT. This may proove expensive though.
1206 *
1207 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1208 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1209 */
1210 PGM_INVL_GUEST_TLBS();
1211#endif
1212
1213 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1214 return rc;
1215}
1216
1217/**
1218 * \#PF Handler callback for PT write accesses.
1219 *
1220 * @returns VBox status code (appropriate for GC return).
1221 * @param pVM VM Handle.
1222 * @param uErrorCode CPU Error code.
1223 * @param pRegFrame Trap register frame.
1224 * NULL on DMA and other non CPU access.
1225 * @param pvFault The fault address (cr2).
1226 * @param GCPhysFault The GC physical address corresponding to pvFault.
1227 * @param pvUser User argument.
1228 */
1229DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1230{
1231 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1232 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1233 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1234 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1235
1236 /*
1237 * We should ALWAYS have the list head as user parameter. This
1238 * is because we use that page to record the changes.
1239 */
1240 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1241
1242 /*
1243 * Disassemble the faulting instruction.
1244 */
1245 DISCPUSTATE Cpu;
1246 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1247 AssertRCReturn(rc, rc);
1248
1249 /*
1250 * Check if it's worth dealing with.
1251 */
1252 bool fReused = false;
1253 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1254#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1255 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1256#else
1257 || pPage->fCR3Mix
1258#endif
1259 )
1260 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1261 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1262 {
1263 /*
1264 * Simple instructions, no REP prefix.
1265 */
1266 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1267 {
1268 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1269 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1270 return rc;
1271 }
1272
1273 /*
1274 * Windows is frequently doing small memset() operations (netio test 4k+).
1275 * We have to deal with these or we'll kill the cache and performance.
1276 */
1277 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1278 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1279 && pRegFrame->ecx <= 0x20
1280 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1281 && !((uintptr_t)pvFault & 3)
1282 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1283 && Cpu.mode == CPUMODE_32BIT
1284 && Cpu.opmode == CPUMODE_32BIT
1285 && Cpu.addrmode == CPUMODE_32BIT
1286 && Cpu.prefix == PREFIX_REP
1287 && !pRegFrame->eflags.Bits.u1DF
1288 )
1289 {
1290 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1291 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1292 return rc;
1293 }
1294
1295 /* REP prefix, don't bother. */
1296 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1297 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1298 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1299 }
1300
1301 /*
1302 * Not worth it, so flush it.
1303 *
1304 * If we considered it to be reused, don't to back to ring-3
1305 * to emulate failed instructions since we usually cannot
1306 * interpret then. This may be a bit risky, in which case
1307 * the reuse detection must be fixed.
1308 */
1309 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1310 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1311 rc = VINF_SUCCESS;
1312 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1313 return rc;
1314}
1315
1316# endif /* !IN_RING3 */
1317#endif /* PGMPOOL_WITH_MONITORING */
1318
1319#ifdef PGMPOOL_WITH_CACHE
1320
1321/**
1322 * Inserts a page into the GCPhys hash table.
1323 *
1324 * @param pPool The pool.
1325 * @param pPage The page.
1326 */
1327DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1328{
1329 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1330 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1331 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1332 pPage->iNext = pPool->aiHash[iHash];
1333 pPool->aiHash[iHash] = pPage->idx;
1334}
1335
1336
1337/**
1338 * Removes a page from the GCPhys hash table.
1339 *
1340 * @param pPool The pool.
1341 * @param pPage The page.
1342 */
1343DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1344{
1345 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1346 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1347 if (pPool->aiHash[iHash] == pPage->idx)
1348 pPool->aiHash[iHash] = pPage->iNext;
1349 else
1350 {
1351 uint16_t iPrev = pPool->aiHash[iHash];
1352 for (;;)
1353 {
1354 const int16_t i = pPool->aPages[iPrev].iNext;
1355 if (i == pPage->idx)
1356 {
1357 pPool->aPages[iPrev].iNext = pPage->iNext;
1358 break;
1359 }
1360 if (i == NIL_PGMPOOL_IDX)
1361 {
1362 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1363 break;
1364 }
1365 iPrev = i;
1366 }
1367 }
1368 pPage->iNext = NIL_PGMPOOL_IDX;
1369}
1370
1371
1372/**
1373 * Frees up one cache page.
1374 *
1375 * @returns VBox status code.
1376 * @retval VINF_SUCCESS on success.
1377 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1378 * @param pPool The pool.
1379 * @param iUser The user index.
1380 */
1381static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1382{
1383#ifndef IN_RC
1384 const PVM pVM = pPool->CTX_SUFF(pVM);
1385#endif
1386 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1387 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1388
1389 /*
1390 * Select one page from the tail of the age list.
1391 */
1392 uint16_t iToFree = pPool->iAgeTail;
1393 if (iToFree == iUser)
1394 iToFree = pPool->aPages[iToFree].iAgePrev;
1395/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1396 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1397 {
1398 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1399 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1400 {
1401 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1402 continue;
1403 iToFree = i;
1404 break;
1405 }
1406 }
1407*/
1408
1409 Assert(iToFree != iUser);
1410 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1411
1412 PPGMPOOLPAGE pPage = &pPool->aPages[iToFree];
1413
1414 /*
1415 * Reject any attempts at flushing the currently active shadow CR3 mapping
1416 */
1417#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1418 if (pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1419#else
1420 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
1421#endif
1422 {
1423 /* Refresh the cr3 mapping by putting it at the head of the age list. */
1424 LogFlow(("pgmPoolCacheFreeOne refuse CR3 mapping\n"));
1425 pgmPoolCacheUsed(pPool, pPage);
1426 return pgmPoolCacheFreeOne(pPool, iUser);
1427 }
1428
1429 int rc = pgmPoolFlushPage(pPool, pPage);
1430 if (rc == VINF_SUCCESS)
1431 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1432 return rc;
1433}
1434
1435
1436/**
1437 * Checks if a kind mismatch is really a page being reused
1438 * or if it's just normal remappings.
1439 *
1440 * @returns true if reused and the cached page (enmKind1) should be flushed
1441 * @returns false if not reused.
1442 * @param enmKind1 The kind of the cached page.
1443 * @param enmKind2 The kind of the requested page.
1444 */
1445static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1446{
1447 switch (enmKind1)
1448 {
1449 /*
1450 * Never reuse them. There is no remapping in non-paging mode.
1451 */
1452 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1453 case PGMPOOLKIND_32BIT_PD_PHYS:
1454 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1455 case PGMPOOLKIND_PAE_PD_PHYS:
1456 case PGMPOOLKIND_PAE_PDPT_PHYS:
1457 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1458 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1459 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1460 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1461 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1462#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1463 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1464 return false;
1465#else
1466 return true;
1467#endif
1468
1469 /*
1470 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1471 */
1472 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1473 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1474 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1475 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1476 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1477 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1478 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1479 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1480 case PGMPOOLKIND_32BIT_PD:
1481 case PGMPOOLKIND_PAE_PDPT:
1482 switch (enmKind2)
1483 {
1484 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1485 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1486 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1487 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1488 case PGMPOOLKIND_64BIT_PML4:
1489 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1490 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1491 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1492 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1493 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1494 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1495 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1496 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1497 return true;
1498 default:
1499 return false;
1500 }
1501
1502 /*
1503 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1504 */
1505 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1506 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1507 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1508 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1509 case PGMPOOLKIND_64BIT_PML4:
1510 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1511 switch (enmKind2)
1512 {
1513 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1514 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1515 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1516 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1517 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1518 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1519 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1520 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1521 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1522 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1523 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1524 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1525 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1526 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1527 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1528 return true;
1529 default:
1530 return false;
1531 }
1532
1533 /*
1534 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1535 */
1536#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1537 case PGMPOOLKIND_ROOT_32BIT_PD:
1538 case PGMPOOLKIND_ROOT_PAE_PD:
1539 case PGMPOOLKIND_ROOT_PDPT:
1540#endif
1541 case PGMPOOLKIND_ROOT_NESTED:
1542 return false;
1543
1544 default:
1545 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1546 }
1547}
1548
1549
1550/**
1551 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1552 *
1553 * @returns VBox status code.
1554 * @retval VINF_PGM_CACHED_PAGE on success.
1555 * @retval VERR_FILE_NOT_FOUND if not found.
1556 * @param pPool The pool.
1557 * @param GCPhys The GC physical address of the page we're gonna shadow.
1558 * @param enmKind The kind of mapping.
1559 * @param iUser The shadow page pool index of the user table.
1560 * @param iUserTable The index into the user table (shadowed).
1561 * @param ppPage Where to store the pointer to the page.
1562 */
1563static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1564{
1565#ifndef IN_RC
1566 const PVM pVM = pPool->CTX_SUFF(pVM);
1567#endif
1568 /*
1569 * Look up the GCPhys in the hash.
1570 */
1571 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1572 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
1573 if (i != NIL_PGMPOOL_IDX)
1574 {
1575 do
1576 {
1577 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1578 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1579 if (pPage->GCPhys == GCPhys)
1580 {
1581 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1582 {
1583 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
1584 * doesn't flush it in case there are no more free use records.
1585 */
1586 pgmPoolCacheUsed(pPool, pPage);
1587
1588 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1589 if (RT_SUCCESS(rc))
1590 {
1591 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
1592 *ppPage = pPage;
1593 STAM_COUNTER_INC(&pPool->StatCacheHits);
1594 return VINF_PGM_CACHED_PAGE;
1595 }
1596 return rc;
1597 }
1598
1599 /*
1600 * The kind is different. In some cases we should now flush the page
1601 * as it has been reused, but in most cases this is normal remapping
1602 * of PDs as PT or big pages using the GCPhys field in a slightly
1603 * different way than the other kinds.
1604 */
1605 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1606 {
1607 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1608 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1609 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1610 break;
1611 }
1612 }
1613
1614 /* next */
1615 i = pPage->iNext;
1616 } while (i != NIL_PGMPOOL_IDX);
1617 }
1618
1619 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
1620 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1621 return VERR_FILE_NOT_FOUND;
1622}
1623
1624
1625/**
1626 * Inserts a page into the cache.
1627 *
1628 * @param pPool The pool.
1629 * @param pPage The cached page.
1630 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1631 */
1632static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1633{
1634 /*
1635 * Insert into the GCPhys hash if the page is fit for that.
1636 */
1637 Assert(!pPage->fCached);
1638 if (fCanBeCached)
1639 {
1640 pPage->fCached = true;
1641 pgmPoolHashInsert(pPool, pPage);
1642 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1643 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1644 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1645 }
1646 else
1647 {
1648 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1649 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1650 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1651 }
1652
1653 /*
1654 * Insert at the head of the age list.
1655 */
1656 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1657 pPage->iAgeNext = pPool->iAgeHead;
1658 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1659 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1660 else
1661 pPool->iAgeTail = pPage->idx;
1662 pPool->iAgeHead = pPage->idx;
1663}
1664
1665
1666/**
1667 * Flushes a cached page.
1668 *
1669 * @param pPool The pool.
1670 * @param pPage The cached page.
1671 */
1672static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1673{
1674 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1675
1676 /*
1677 * Remove the page from the hash.
1678 */
1679 if (pPage->fCached)
1680 {
1681 pPage->fCached = false;
1682 pgmPoolHashRemove(pPool, pPage);
1683 }
1684 else
1685 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1686
1687 /*
1688 * Remove it from the age list.
1689 */
1690 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1691 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1692 else
1693 pPool->iAgeTail = pPage->iAgePrev;
1694 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1695 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1696 else
1697 pPool->iAgeHead = pPage->iAgeNext;
1698 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1699 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1700}
1701
1702#endif /* PGMPOOL_WITH_CACHE */
1703#ifdef PGMPOOL_WITH_MONITORING
1704
1705/**
1706 * Looks for pages sharing the monitor.
1707 *
1708 * @returns Pointer to the head page.
1709 * @returns NULL if not found.
1710 * @param pPool The Pool
1711 * @param pNewPage The page which is going to be monitored.
1712 */
1713static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1714{
1715#ifdef PGMPOOL_WITH_CACHE
1716 /*
1717 * Look up the GCPhys in the hash.
1718 */
1719 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1720 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1721 if (i == NIL_PGMPOOL_IDX)
1722 return NULL;
1723 do
1724 {
1725 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1726 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1727 && pPage != pNewPage)
1728 {
1729 switch (pPage->enmKind)
1730 {
1731 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1732 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1733 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1734 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1735 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1736 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1737 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1738 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1739 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1740 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1741 case PGMPOOLKIND_64BIT_PML4:
1742#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1743 case PGMPOOLKIND_32BIT_PD:
1744 case PGMPOOLKIND_PAE_PDPT:
1745#else
1746 case PGMPOOLKIND_ROOT_32BIT_PD:
1747 case PGMPOOLKIND_ROOT_PAE_PD:
1748 case PGMPOOLKIND_ROOT_PDPT:
1749#endif
1750 {
1751 /* find the head */
1752 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1753 {
1754 Assert(pPage->iMonitoredPrev != pPage->idx);
1755 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1756 }
1757 return pPage;
1758 }
1759
1760 /* ignore, no monitoring. */
1761 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1762 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1763 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1764 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1765 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1766 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1767 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1768 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1769 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1770 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1771 case PGMPOOLKIND_ROOT_NESTED:
1772 case PGMPOOLKIND_PAE_PD_PHYS:
1773 case PGMPOOLKIND_PAE_PDPT_PHYS:
1774 case PGMPOOLKIND_32BIT_PD_PHYS:
1775#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1776 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1777#endif
1778 break;
1779 default:
1780 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1781 }
1782 }
1783
1784 /* next */
1785 i = pPage->iNext;
1786 } while (i != NIL_PGMPOOL_IDX);
1787#endif
1788 return NULL;
1789}
1790
1791
1792/**
1793 * Enabled write monitoring of a guest page.
1794 *
1795 * @returns VBox status code.
1796 * @retval VINF_SUCCESS on success.
1797 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1798 * @param pPool The pool.
1799 * @param pPage The cached page.
1800 */
1801static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1802{
1803 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1804
1805 /*
1806 * Filter out the relevant kinds.
1807 */
1808 switch (pPage->enmKind)
1809 {
1810 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1811 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1812 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1813 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1814 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1815 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1816 case PGMPOOLKIND_64BIT_PML4:
1817#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1818 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1819 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1820 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1821 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1822 case PGMPOOLKIND_32BIT_PD:
1823 case PGMPOOLKIND_PAE_PDPT:
1824#else
1825 case PGMPOOLKIND_ROOT_PDPT:
1826#endif
1827 break;
1828
1829 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1830 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1831 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1832 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1833 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1834 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1835 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1836 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1837 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1838 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1839 case PGMPOOLKIND_ROOT_NESTED:
1840 /* Nothing to monitor here. */
1841 return VINF_SUCCESS;
1842
1843#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1844 case PGMPOOLKIND_32BIT_PD_PHYS:
1845 case PGMPOOLKIND_PAE_PDPT_PHYS:
1846 case PGMPOOLKIND_PAE_PD_PHYS:
1847 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1848 /* Nothing to monitor here. */
1849 return VINF_SUCCESS;
1850#else
1851 case PGMPOOLKIND_ROOT_32BIT_PD:
1852 case PGMPOOLKIND_ROOT_PAE_PD:
1853#endif
1854#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1855 break;
1856#else
1857 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1858#endif
1859 default:
1860 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1861 }
1862
1863 /*
1864 * Install handler.
1865 */
1866 int rc;
1867 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1868 if (pPageHead)
1869 {
1870 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1871 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1872 pPage->iMonitoredPrev = pPageHead->idx;
1873 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1874 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1875 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1876 pPageHead->iMonitoredNext = pPage->idx;
1877 rc = VINF_SUCCESS;
1878 }
1879 else
1880 {
1881 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1882 PVM pVM = pPool->CTX_SUFF(pVM);
1883 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1884 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1885 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1886 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1887 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1888 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1889 pPool->pszAccessHandler);
1890 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1891 * the heap size should suffice. */
1892 AssertFatalRC(rc);
1893 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1894 rc = VERR_PGM_POOL_CLEARED;
1895 }
1896 pPage->fMonitored = true;
1897 return rc;
1898}
1899
1900
1901/**
1902 * Disables write monitoring of a guest page.
1903 *
1904 * @returns VBox status code.
1905 * @retval VINF_SUCCESS on success.
1906 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1907 * @param pPool The pool.
1908 * @param pPage The cached page.
1909 */
1910static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1911{
1912 /*
1913 * Filter out the relevant kinds.
1914 */
1915 switch (pPage->enmKind)
1916 {
1917 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1918 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1919 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1920 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1921 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1922 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1923 case PGMPOOLKIND_64BIT_PML4:
1924#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1925 case PGMPOOLKIND_32BIT_PD:
1926 case PGMPOOLKIND_PAE_PDPT:
1927 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1928 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1929 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1930 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1931#else
1932 case PGMPOOLKIND_ROOT_PDPT:
1933#endif
1934 break;
1935
1936 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1937 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1938 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1939 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1940 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1941 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1942 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1943 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1944 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1945 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1946 case PGMPOOLKIND_ROOT_NESTED:
1947 case PGMPOOLKIND_PAE_PD_PHYS:
1948 case PGMPOOLKIND_PAE_PDPT_PHYS:
1949 case PGMPOOLKIND_32BIT_PD_PHYS:
1950 /* Nothing to monitor here. */
1951 return VINF_SUCCESS;
1952
1953#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1954 case PGMPOOLKIND_ROOT_32BIT_PD:
1955 case PGMPOOLKIND_ROOT_PAE_PD:
1956#endif
1957#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1958 break;
1959#endif
1960#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1961 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1962#endif
1963 default:
1964 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1965 }
1966
1967 /*
1968 * Remove the page from the monitored list or uninstall it if last.
1969 */
1970 const PVM pVM = pPool->CTX_SUFF(pVM);
1971 int rc;
1972 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1973 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1974 {
1975 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1976 {
1977 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1978 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1979#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1980 pNewHead->fCR3Mix = pPage->fCR3Mix;
1981#endif
1982 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1983 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1984 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1985 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
1986 pPool->pszAccessHandler);
1987 AssertFatalRCSuccess(rc);
1988 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1989 }
1990 else
1991 {
1992 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1993 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1994 {
1995 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1996 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1997 }
1998 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1999 rc = VINF_SUCCESS;
2000 }
2001 }
2002 else
2003 {
2004 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2005 AssertFatalRC(rc);
2006 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2007 rc = VERR_PGM_POOL_CLEARED;
2008 }
2009 pPage->fMonitored = false;
2010
2011 /*
2012 * Remove it from the list of modified pages (if in it).
2013 */
2014 pgmPoolMonitorModifiedRemove(pPool, pPage);
2015
2016 return rc;
2017}
2018
2019# if defined(PGMPOOL_WITH_MIXED_PT_CR3) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2020
2021/**
2022 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
2023 *
2024 * @param pPool The Pool.
2025 * @param pPage A page in the chain.
2026 * @param fCR3Mix The new fCR3Mix value.
2027 */
2028static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
2029{
2030 /* current */
2031 pPage->fCR3Mix = fCR3Mix;
2032
2033 /* before */
2034 int16_t idx = pPage->iMonitoredPrev;
2035 while (idx != NIL_PGMPOOL_IDX)
2036 {
2037 pPool->aPages[idx].fCR3Mix = fCR3Mix;
2038 idx = pPool->aPages[idx].iMonitoredPrev;
2039 }
2040
2041 /* after */
2042 idx = pPage->iMonitoredNext;
2043 while (idx != NIL_PGMPOOL_IDX)
2044 {
2045 pPool->aPages[idx].fCR3Mix = fCR3Mix;
2046 idx = pPool->aPages[idx].iMonitoredNext;
2047 }
2048}
2049
2050
2051/**
2052 * Installs or modifies monitoring of a CR3 page (special).
2053 *
2054 * We're pretending the CR3 page is shadowed by the pool so we can use the
2055 * generic mechanisms in detecting chained monitoring. (This also gives us a
2056 * tast of what code changes are required to really pool CR3 shadow pages.)
2057 *
2058 * @returns VBox status code.
2059 * @param pPool The pool.
2060 * @param idxRoot The CR3 (root) page index.
2061 * @param GCPhysCR3 The (new) CR3 value.
2062 */
2063int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
2064{
2065 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
2066 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
2067 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d} GCPhysCR3=%RGp\n",
2068 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
2069
2070 /*
2071 * The unlikely case where it already matches.
2072 */
2073 if (pPage->GCPhys == GCPhysCR3)
2074 {
2075 Assert(pPage->fMonitored);
2076 return VINF_SUCCESS;
2077 }
2078
2079 /*
2080 * Flush the current monitoring and remove it from the hash.
2081 */
2082 int rc = VINF_SUCCESS;
2083 if (pPage->fMonitored)
2084 {
2085 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
2086 rc = pgmPoolMonitorFlush(pPool, pPage);
2087 if (rc == VERR_PGM_POOL_CLEARED)
2088 rc = VINF_SUCCESS;
2089 else
2090 AssertFatalRC(rc);
2091 pgmPoolHashRemove(pPool, pPage);
2092 }
2093
2094 /*
2095 * Monitor the page at the new location and insert it into the hash.
2096 */
2097 pPage->GCPhys = GCPhysCR3;
2098 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
2099 if (rc2 != VERR_PGM_POOL_CLEARED)
2100 {
2101 AssertFatalRC(rc2);
2102 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
2103 rc = rc2;
2104 }
2105 pgmPoolHashInsert(pPool, pPage);
2106 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
2107 return rc;
2108}
2109
2110
2111/**
2112 * Removes the monitoring of a CR3 page (special).
2113 *
2114 * @returns VBox status code.
2115 * @param pPool The pool.
2116 * @param idxRoot The CR3 (root) page index.
2117 */
2118int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
2119{
2120 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
2121 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
2122 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d}\n",
2123 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
2124
2125 if (!pPage->fMonitored)
2126 return VINF_SUCCESS;
2127
2128 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
2129 int rc = pgmPoolMonitorFlush(pPool, pPage);
2130 if (rc != VERR_PGM_POOL_CLEARED)
2131 AssertFatalRC(rc);
2132 else
2133 rc = VINF_SUCCESS;
2134 pgmPoolHashRemove(pPool, pPage);
2135 Assert(!pPage->fMonitored);
2136 pPage->GCPhys = NIL_RTGCPHYS;
2137 return rc;
2138}
2139
2140# endif /* PGMPOOL_WITH_MIXED_PT_CR3 && !VBOX_WITH_PGMPOOL_PAGING_ONLY*/
2141
2142/**
2143 * Inserts the page into the list of modified pages.
2144 *
2145 * @param pPool The pool.
2146 * @param pPage The page.
2147 */
2148void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2149{
2150 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2151 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2152 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2153 && pPool->iModifiedHead != pPage->idx,
2154 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2155 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2156 pPool->iModifiedHead, pPool->cModifiedPages));
2157
2158 pPage->iModifiedNext = pPool->iModifiedHead;
2159 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2160 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2161 pPool->iModifiedHead = pPage->idx;
2162 pPool->cModifiedPages++;
2163#ifdef VBOX_WITH_STATISTICS
2164 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2165 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2166#endif
2167}
2168
2169
2170/**
2171 * Removes the page from the list of modified pages and resets the
2172 * moficiation counter.
2173 *
2174 * @param pPool The pool.
2175 * @param pPage The page which is believed to be in the list of modified pages.
2176 */
2177static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2178{
2179 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2180 if (pPool->iModifiedHead == pPage->idx)
2181 {
2182 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2183 pPool->iModifiedHead = pPage->iModifiedNext;
2184 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2185 {
2186 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2187 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2188 }
2189 pPool->cModifiedPages--;
2190 }
2191 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2192 {
2193 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2194 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2195 {
2196 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2197 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2198 }
2199 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2200 pPool->cModifiedPages--;
2201 }
2202 else
2203 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2204 pPage->cModifications = 0;
2205}
2206
2207
2208/**
2209 * Zaps the list of modified pages, resetting their modification counters in the process.
2210 *
2211 * @param pVM The VM handle.
2212 */
2213void pgmPoolMonitorModifiedClearAll(PVM pVM)
2214{
2215 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2216 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2217
2218 unsigned cPages = 0; NOREF(cPages);
2219 uint16_t idx = pPool->iModifiedHead;
2220 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2221 while (idx != NIL_PGMPOOL_IDX)
2222 {
2223 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2224 idx = pPage->iModifiedNext;
2225 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2226 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2227 pPage->cModifications = 0;
2228 Assert(++cPages);
2229 }
2230 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2231 pPool->cModifiedPages = 0;
2232}
2233
2234
2235#ifdef IN_RING3
2236/**
2237 * Clear all shadow pages and clear all modification counters.
2238 *
2239 * @param pVM The VM handle.
2240 * @remark Should only be used when monitoring is available, thus placed in
2241 * the PGMPOOL_WITH_MONITORING #ifdef.
2242 */
2243void pgmPoolClearAll(PVM pVM)
2244{
2245 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2246 STAM_PROFILE_START(&pPool->StatClearAll, c);
2247 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2248
2249 /*
2250 * Iterate all the pages until we've encountered all that in use.
2251 * This is simple but not quite optimal solution.
2252 */
2253 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2254 unsigned cLeft = pPool->cUsedPages;
2255 unsigned iPage = pPool->cCurPages;
2256 while (--iPage >= PGMPOOL_IDX_FIRST)
2257 {
2258 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2259 if (pPage->GCPhys != NIL_RTGCPHYS)
2260 {
2261 switch (pPage->enmKind)
2262 {
2263 /*
2264 * We only care about shadow page tables.
2265 */
2266 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2267 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2268 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2269 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2270 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2271 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2272 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2273 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2274 {
2275#ifdef PGMPOOL_WITH_USER_TRACKING
2276 if (pPage->cPresent)
2277#endif
2278 {
2279 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2280 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2281 ASMMemZeroPage(pvShw);
2282 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2283#ifdef PGMPOOL_WITH_USER_TRACKING
2284 pPage->cPresent = 0;
2285 pPage->iFirstPresent = ~0;
2286#endif
2287 }
2288 }
2289 /* fall thru */
2290
2291 default:
2292 Assert(!pPage->cModifications || ++cModifiedPages);
2293 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2294 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2295 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2296 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2297 pPage->cModifications = 0;
2298 break;
2299
2300 }
2301 if (!--cLeft)
2302 break;
2303 }
2304 }
2305
2306 /* swipe the special pages too. */
2307 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2308 {
2309 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2310 if (pPage->GCPhys != NIL_RTGCPHYS)
2311 {
2312 Assert(!pPage->cModifications || ++cModifiedPages);
2313 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2314 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2315 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2316 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2317 pPage->cModifications = 0;
2318 }
2319 }
2320
2321#ifndef DEBUG_michael
2322 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2323#endif
2324 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2325 pPool->cModifiedPages = 0;
2326
2327#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2328 /*
2329 * Clear all the GCPhys links and rebuild the phys ext free list.
2330 */
2331 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2332 pRam;
2333 pRam = pRam->CTX_SUFF(pNext))
2334 {
2335 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2336 while (iPage-- > 0)
2337 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
2338 }
2339
2340 pPool->iPhysExtFreeHead = 0;
2341 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2342 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2343 for (unsigned i = 0; i < cMaxPhysExts; i++)
2344 {
2345 paPhysExts[i].iNext = i + 1;
2346 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2347 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2348 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2349 }
2350 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2351#endif
2352
2353
2354 pPool->cPresent = 0;
2355 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2356}
2357#endif /* IN_RING3 */
2358
2359
2360/**
2361 * Handle SyncCR3 pool tasks
2362 *
2363 * @returns VBox status code.
2364 * @retval VINF_SUCCESS if successfully added.
2365 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2366 * @param pVM The VM handle.
2367 * @remark Should only be used when monitoring is available, thus placed in
2368 * the PGMPOOL_WITH_MONITORING #ifdef.
2369 */
2370int pgmPoolSyncCR3(PVM pVM)
2371{
2372 LogFlow(("pgmPoolSyncCR3\n"));
2373 /*
2374 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2375 * Occasionally we will have to clear all the shadow page tables because we wanted
2376 * to monitor a page which was mapped by too many shadowed page tables. This operation
2377 * sometimes refered to as a 'lightweight flush'.
2378 */
2379 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2380 pgmPoolMonitorModifiedClearAll(pVM);
2381 else
2382 {
2383# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2384 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2385 pgmPoolClearAll(pVM);
2386# else /* !IN_RING3 */
2387 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2388 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2389 return VINF_PGM_SYNC_CR3;
2390# endif /* !IN_RING3 */
2391 }
2392 return VINF_SUCCESS;
2393}
2394
2395#endif /* PGMPOOL_WITH_MONITORING */
2396#ifdef PGMPOOL_WITH_USER_TRACKING
2397
2398/**
2399 * Frees up at least one user entry.
2400 *
2401 * @returns VBox status code.
2402 * @retval VINF_SUCCESS if successfully added.
2403 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2404 * @param pPool The pool.
2405 * @param iUser The user index.
2406 */
2407static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2408{
2409 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2410#ifdef PGMPOOL_WITH_CACHE
2411 /*
2412 * Just free cached pages in a braindead fashion.
2413 */
2414 /** @todo walk the age list backwards and free the first with usage. */
2415 int rc = VINF_SUCCESS;
2416 do
2417 {
2418 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2419 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2420 rc = rc2;
2421 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2422 return rc;
2423#else
2424 /*
2425 * Lazy approach.
2426 */
2427 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2428 Assert(!CPUMIsGuestInLongMode(pVM));
2429 pgmPoolFlushAllInt(pPool);
2430 return VERR_PGM_POOL_FLUSHED;
2431#endif
2432}
2433
2434
2435/**
2436 * Inserts a page into the cache.
2437 *
2438 * This will create user node for the page, insert it into the GCPhys
2439 * hash, and insert it into the age list.
2440 *
2441 * @returns VBox status code.
2442 * @retval VINF_SUCCESS if successfully added.
2443 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2444 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2445 * @param pPool The pool.
2446 * @param pPage The cached page.
2447 * @param GCPhys The GC physical address of the page we're gonna shadow.
2448 * @param iUser The user index.
2449 * @param iUserTable The user table index.
2450 */
2451DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2452{
2453 int rc = VINF_SUCCESS;
2454 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2455
2456 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2457
2458#ifdef VBOX_STRICT
2459 /*
2460 * Check that the entry doesn't already exists.
2461 */
2462 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2463 {
2464 uint16_t i = pPage->iUserHead;
2465 do
2466 {
2467 Assert(i < pPool->cMaxUsers);
2468 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2469 i = paUsers[i].iNext;
2470 } while (i != NIL_PGMPOOL_USER_INDEX);
2471 }
2472#endif
2473
2474 /*
2475 * Find free a user node.
2476 */
2477 uint16_t i = pPool->iUserFreeHead;
2478 if (i == NIL_PGMPOOL_USER_INDEX)
2479 {
2480 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2481 if (RT_FAILURE(rc))
2482 return rc;
2483 i = pPool->iUserFreeHead;
2484 }
2485
2486 /*
2487 * Unlink the user node from the free list,
2488 * initialize and insert it into the user list.
2489 */
2490 pPool->iUserFreeHead = paUsers[i].iNext;
2491 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2492 paUsers[i].iUser = iUser;
2493 paUsers[i].iUserTable = iUserTable;
2494 pPage->iUserHead = i;
2495
2496 /*
2497 * Insert into cache and enable monitoring of the guest page if enabled.
2498 *
2499 * Until we implement caching of all levels, including the CR3 one, we'll
2500 * have to make sure we don't try monitor & cache any recursive reuse of
2501 * a monitored CR3 page. Because all windows versions are doing this we'll
2502 * have to be able to do combined access monitoring, CR3 + PT and
2503 * PD + PT (guest PAE).
2504 *
2505 * Update:
2506 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2507 */
2508#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2509# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2510 const bool fCanBeMonitored = true;
2511# else
2512 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2513 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2514 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2515# endif
2516# ifdef PGMPOOL_WITH_CACHE
2517 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2518# endif
2519 if (fCanBeMonitored)
2520 {
2521# ifdef PGMPOOL_WITH_MONITORING
2522 rc = pgmPoolMonitorInsert(pPool, pPage);
2523 if (rc == VERR_PGM_POOL_CLEARED)
2524 {
2525 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2526# ifndef PGMPOOL_WITH_CACHE
2527 pgmPoolMonitorFlush(pPool, pPage);
2528 rc = VERR_PGM_POOL_FLUSHED;
2529# endif
2530 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2531 paUsers[i].iNext = pPool->iUserFreeHead;
2532 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2533 pPool->iUserFreeHead = i;
2534 }
2535 }
2536# endif
2537#endif /* PGMPOOL_WITH_MONITORING */
2538 return rc;
2539}
2540
2541
2542# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2543/**
2544 * Adds a user reference to a page.
2545 *
2546 * This will move the page to the head of the
2547 *
2548 * @returns VBox status code.
2549 * @retval VINF_SUCCESS if successfully added.
2550 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2551 * @param pPool The pool.
2552 * @param pPage The cached page.
2553 * @param iUser The user index.
2554 * @param iUserTable The user table.
2555 */
2556static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2557{
2558 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2559
2560 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2561# ifdef VBOX_STRICT
2562 /*
2563 * Check that the entry doesn't already exists.
2564 */
2565 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2566 {
2567 uint16_t i = pPage->iUserHead;
2568 do
2569 {
2570 Assert(i < pPool->cMaxUsers);
2571 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2572 i = paUsers[i].iNext;
2573 } while (i != NIL_PGMPOOL_USER_INDEX);
2574 }
2575# endif
2576
2577 /*
2578 * Allocate a user node.
2579 */
2580 uint16_t i = pPool->iUserFreeHead;
2581 if (i == NIL_PGMPOOL_USER_INDEX)
2582 {
2583 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2584 if (RT_FAILURE(rc))
2585 return rc;
2586 i = pPool->iUserFreeHead;
2587 }
2588 pPool->iUserFreeHead = paUsers[i].iNext;
2589
2590 /*
2591 * Initialize the user node and insert it.
2592 */
2593 paUsers[i].iNext = pPage->iUserHead;
2594 paUsers[i].iUser = iUser;
2595 paUsers[i].iUserTable = iUserTable;
2596 pPage->iUserHead = i;
2597
2598# ifdef PGMPOOL_WITH_CACHE
2599 /*
2600 * Tell the cache to update its replacement stats for this page.
2601 */
2602 pgmPoolCacheUsed(pPool, pPage);
2603# endif
2604 return VINF_SUCCESS;
2605}
2606# endif /* PGMPOOL_WITH_CACHE */
2607
2608
2609/**
2610 * Frees a user record associated with a page.
2611 *
2612 * This does not clear the entry in the user table, it simply replaces the
2613 * user record to the chain of free records.
2614 *
2615 * @param pPool The pool.
2616 * @param HCPhys The HC physical address of the shadow page.
2617 * @param iUser The shadow page pool index of the user table.
2618 * @param iUserTable The index into the user table (shadowed).
2619 */
2620static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2621{
2622 /*
2623 * Unlink and free the specified user entry.
2624 */
2625 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2626
2627 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2628 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2629 uint16_t i = pPage->iUserHead;
2630 if ( i != NIL_PGMPOOL_USER_INDEX
2631 && paUsers[i].iUser == iUser
2632 && paUsers[i].iUserTable == iUserTable)
2633 {
2634 pPage->iUserHead = paUsers[i].iNext;
2635
2636 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2637 paUsers[i].iNext = pPool->iUserFreeHead;
2638 pPool->iUserFreeHead = i;
2639 return;
2640 }
2641
2642 /* General: Linear search. */
2643 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2644 while (i != NIL_PGMPOOL_USER_INDEX)
2645 {
2646 if ( paUsers[i].iUser == iUser
2647 && paUsers[i].iUserTable == iUserTable)
2648 {
2649 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2650 paUsers[iPrev].iNext = paUsers[i].iNext;
2651 else
2652 pPage->iUserHead = paUsers[i].iNext;
2653
2654 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2655 paUsers[i].iNext = pPool->iUserFreeHead;
2656 pPool->iUserFreeHead = i;
2657 return;
2658 }
2659 iPrev = i;
2660 i = paUsers[i].iNext;
2661 }
2662
2663 /* Fatal: didn't find it */
2664 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2665 iUser, iUserTable, pPage->GCPhys));
2666}
2667
2668
2669/**
2670 * Gets the entry size of a shadow table.
2671 *
2672 * @param enmKind The kind of page.
2673 *
2674 * @returns The size of the entry in bytes. That is, 4 or 8.
2675 * @returns If the kind is not for a table, an assertion is raised and 0 is
2676 * returned.
2677 */
2678DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2679{
2680 switch (enmKind)
2681 {
2682 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2683 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2684 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2685#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2686 case PGMPOOLKIND_32BIT_PD:
2687 case PGMPOOLKIND_32BIT_PD_PHYS:
2688#else
2689 case PGMPOOLKIND_ROOT_32BIT_PD:
2690#endif
2691 return 4;
2692
2693 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2694 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2695 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2696 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2697 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2698 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2699 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2700 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2701 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2702 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2703 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2704 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2705 case PGMPOOLKIND_64BIT_PML4:
2706#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2707 case PGMPOOLKIND_ROOT_PAE_PD:
2708 case PGMPOOLKIND_ROOT_PDPT:
2709#endif
2710 case PGMPOOLKIND_PAE_PDPT:
2711 case PGMPOOLKIND_ROOT_NESTED:
2712 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2713 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2714 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2715 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2716 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2717 case PGMPOOLKIND_PAE_PD_PHYS:
2718 case PGMPOOLKIND_PAE_PDPT_PHYS:
2719 return 8;
2720
2721 default:
2722 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2723 }
2724}
2725
2726
2727/**
2728 * Gets the entry size of a guest table.
2729 *
2730 * @param enmKind The kind of page.
2731 *
2732 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2733 * @returns If the kind is not for a table, an assertion is raised and 0 is
2734 * returned.
2735 */
2736DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2737{
2738 switch (enmKind)
2739 {
2740 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2741 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2742#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2743 case PGMPOOLKIND_32BIT_PD:
2744#else
2745 case PGMPOOLKIND_ROOT_32BIT_PD:
2746#endif
2747 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2748 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2749 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2750 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2751 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2752 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2753 return 4;
2754
2755 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2756 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2757 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2758 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2759 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2760 case PGMPOOLKIND_64BIT_PML4:
2761#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2762 case PGMPOOLKIND_PAE_PDPT:
2763#else
2764 case PGMPOOLKIND_ROOT_PAE_PD:
2765 case PGMPOOLKIND_ROOT_PDPT:
2766#endif
2767 return 8;
2768
2769 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2770 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2771 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2772 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2773 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2774 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2775 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2776 case PGMPOOLKIND_ROOT_NESTED:
2777 case PGMPOOLKIND_PAE_PD_PHYS:
2778 case PGMPOOLKIND_PAE_PDPT_PHYS:
2779 case PGMPOOLKIND_32BIT_PD_PHYS:
2780 /** @todo can we return 0? (nobody is calling this...) */
2781 AssertFailed();
2782 return 0;
2783
2784 default:
2785 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2786 }
2787}
2788
2789#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2790
2791/**
2792 * Scans one shadow page table for mappings of a physical page.
2793 *
2794 * @param pVM The VM handle.
2795 * @param pPhysPage The guest page in question.
2796 * @param iShw The shadow page table.
2797 * @param cRefs The number of references made in that PT.
2798 */
2799static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2800{
2801 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
2802 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2803
2804 /*
2805 * Assert sanity.
2806 */
2807 Assert(cRefs == 1);
2808 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2809 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2810
2811 /*
2812 * Then, clear the actual mappings to the page in the shadow PT.
2813 */
2814 switch (pPage->enmKind)
2815 {
2816 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2817 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2818 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2819 {
2820 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2821 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2822 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2823 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2824 {
2825 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2826 pPT->a[i].u = 0;
2827 cRefs--;
2828 if (!cRefs)
2829 return;
2830 }
2831#ifdef LOG_ENABLED
2832 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2833 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2834 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2835 {
2836 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2837 pPT->a[i].u = 0;
2838 }
2839#endif
2840 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2841 break;
2842 }
2843
2844 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2845 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2846 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2847 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2848 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2849 {
2850 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2851 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2852 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2853 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2854 {
2855 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2856 pPT->a[i].u = 0;
2857 cRefs--;
2858 if (!cRefs)
2859 return;
2860 }
2861#ifdef LOG_ENABLED
2862 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2863 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2864 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2865 {
2866 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2867 pPT->a[i].u = 0;
2868 }
2869#endif
2870 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
2871 break;
2872 }
2873
2874 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2875 {
2876 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2877 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2878 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2879 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2880 {
2881 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2882 pPT->a[i].u = 0;
2883 cRefs--;
2884 if (!cRefs)
2885 return;
2886 }
2887#ifdef LOG_ENABLED
2888 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2889 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2890 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2891 {
2892 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2893 pPT->a[i].u = 0;
2894 }
2895#endif
2896 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2897 break;
2898 }
2899
2900 default:
2901 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2902 }
2903}
2904
2905
2906/**
2907 * Scans one shadow page table for mappings of a physical page.
2908 *
2909 * @param pVM The VM handle.
2910 * @param pPhysPage The guest page in question.
2911 * @param iShw The shadow page table.
2912 * @param cRefs The number of references made in that PT.
2913 */
2914void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2915{
2916 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2917 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
2918 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2919 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2920 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
2921 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2922}
2923
2924
2925/**
2926 * Flushes a list of shadow page tables mapping the same physical page.
2927 *
2928 * @param pVM The VM handle.
2929 * @param pPhysPage The guest page in question.
2930 * @param iPhysExt The physical cross reference extent list to flush.
2931 */
2932void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2933{
2934 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2935 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2936 LogFlow(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%R[pgmpage] iPhysExt\n", pPhysPage, iPhysExt));
2937
2938 const uint16_t iPhysExtStart = iPhysExt;
2939 PPGMPOOLPHYSEXT pPhysExt;
2940 do
2941 {
2942 Assert(iPhysExt < pPool->cMaxPhysExts);
2943 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2944 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2945 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2946 {
2947 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2948 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2949 }
2950
2951 /* next */
2952 iPhysExt = pPhysExt->iNext;
2953 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2954
2955 /* insert the list into the free list and clear the ram range entry. */
2956 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2957 pPool->iPhysExtFreeHead = iPhysExtStart;
2958 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
2959
2960 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2961}
2962
2963#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2964
2965/**
2966 * Scans all shadow page tables for mappings of a physical page.
2967 *
2968 * This may be slow, but it's most likely more efficient than cleaning
2969 * out the entire page pool / cache.
2970 *
2971 * @returns VBox status code.
2972 * @retval VINF_SUCCESS if all references has been successfully cleared.
2973 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2974 * a page pool cleaning.
2975 *
2976 * @param pVM The VM handle.
2977 * @param pPhysPage The guest page in question.
2978 */
2979int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2980{
2981 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2982 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2983 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
2984 pPool->cUsedPages, pPool->cPresent, pPhysPage));
2985
2986#if 1
2987 /*
2988 * There is a limit to what makes sense.
2989 */
2990 if (pPool->cPresent > 1024)
2991 {
2992 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2993 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2994 return VINF_PGM_GCPHYS_ALIASED;
2995 }
2996#endif
2997
2998 /*
2999 * Iterate all the pages until we've encountered all that in use.
3000 * This is simple but not quite optimal solution.
3001 */
3002 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3003 const uint32_t u32 = u64;
3004 unsigned cLeft = pPool->cUsedPages;
3005 unsigned iPage = pPool->cCurPages;
3006 while (--iPage >= PGMPOOL_IDX_FIRST)
3007 {
3008 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3009 if (pPage->GCPhys != NIL_RTGCPHYS)
3010 {
3011 switch (pPage->enmKind)
3012 {
3013 /*
3014 * We only care about shadow page tables.
3015 */
3016 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3017 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3018 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3019 {
3020 unsigned cPresent = pPage->cPresent;
3021 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3022 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3023 if (pPT->a[i].n.u1Present)
3024 {
3025 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3026 {
3027 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3028 pPT->a[i].u = 0;
3029 }
3030 if (!--cPresent)
3031 break;
3032 }
3033 break;
3034 }
3035
3036 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3037 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3038 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3039 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3040 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3041 {
3042 unsigned cPresent = pPage->cPresent;
3043 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3044 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3045 if (pPT->a[i].n.u1Present)
3046 {
3047 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3048 {
3049 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3050 pPT->a[i].u = 0;
3051 }
3052 if (!--cPresent)
3053 break;
3054 }
3055 break;
3056 }
3057 }
3058 if (!--cLeft)
3059 break;
3060 }
3061 }
3062
3063 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3064 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3065 return VINF_SUCCESS;
3066}
3067
3068
3069/**
3070 * Clears the user entry in a user table.
3071 *
3072 * This is used to remove all references to a page when flushing it.
3073 */
3074static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3075{
3076 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3077 Assert(pUser->iUser < pPool->cCurPages);
3078 uint32_t iUserTable = pUser->iUserTable;
3079
3080 /*
3081 * Map the user page.
3082 */
3083 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3084#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3085 if (pUserPage->enmKind == PGMPOOLKIND_ROOT_PAE_PD)
3086 {
3087 /* Must translate the fake 2048 entry PD to a 512 PD one since the R0 mapping is not linear. */
3088 Assert(pUser->iUser == PGMPOOL_IDX_PAE_PD);
3089 uint32_t iPdpt = iUserTable / X86_PG_PAE_ENTRIES;
3090 iUserTable %= X86_PG_PAE_ENTRIES;
3091 pUserPage = &pPool->aPages[PGMPOOL_IDX_PAE_PD_0 + iPdpt];
3092 Assert(pUserPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD);
3093 }
3094#endif
3095 union
3096 {
3097 uint64_t *pau64;
3098 uint32_t *pau32;
3099 } u;
3100 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3101
3102 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3103
3104 /* Safety precaution in case we change the paging for other modes too in the future. */
3105#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3106 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3107#else
3108 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
3109#endif
3110
3111#ifdef VBOX_STRICT
3112 /*
3113 * Some sanity checks.
3114 */
3115 switch (pUserPage->enmKind)
3116 {
3117# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3118 case PGMPOOLKIND_32BIT_PD:
3119 case PGMPOOLKIND_32BIT_PD_PHYS:
3120 Assert(iUserTable < X86_PG_ENTRIES);
3121 break;
3122# else
3123 case PGMPOOLKIND_ROOT_32BIT_PD:
3124 Assert(iUserTable < X86_PG_ENTRIES);
3125 Assert(!(u.pau32[iUserTable] & PGM_PDFLAGS_MAPPING));
3126 break;
3127# endif
3128# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3129 case PGMPOOLKIND_ROOT_PAE_PD:
3130 Assert(iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
3131 AssertMsg(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING), ("%llx %d\n", u.pau64[iUserTable], iUserTable));
3132 break;
3133# endif
3134# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3135 case PGMPOOLKIND_PAE_PDPT:
3136 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3137 case PGMPOOLKIND_PAE_PDPT_PHYS:
3138# else
3139 case PGMPOOLKIND_ROOT_PDPT:
3140# endif
3141 Assert(iUserTable < 4);
3142 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3143 break;
3144 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3145 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3146 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3147 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3148 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3149 case PGMPOOLKIND_PAE_PD_PHYS:
3150 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3151 break;
3152 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3153 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3154 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3155 break;
3156 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3157 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3158 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3159 break;
3160 case PGMPOOLKIND_64BIT_PML4:
3161 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3162 /* GCPhys >> PAGE_SHIFT is the index here */
3163 break;
3164 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3165 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3166 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3167 break;
3168
3169 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3170 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3171 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3172 break;
3173
3174 case PGMPOOLKIND_ROOT_NESTED:
3175 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3176 break;
3177
3178 default:
3179 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3180 break;
3181 }
3182#endif /* VBOX_STRICT */
3183
3184 /*
3185 * Clear the entry in the user page.
3186 */
3187 switch (pUserPage->enmKind)
3188 {
3189 /* 32-bit entries */
3190#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3191 case PGMPOOLKIND_32BIT_PD:
3192 case PGMPOOLKIND_32BIT_PD_PHYS:
3193#else
3194 case PGMPOOLKIND_ROOT_32BIT_PD:
3195#endif
3196 u.pau32[iUserTable] = 0;
3197 break;
3198
3199 /* 64-bit entries */
3200 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3201 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3202 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3203 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3204 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3205#if defined(IN_RC) && defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3206 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3207 * non-present PDPT will continue to cause page faults.
3208 */
3209 ASMReloadCR3();
3210#endif
3211 /* no break */
3212 case PGMPOOLKIND_PAE_PD_PHYS:
3213 case PGMPOOLKIND_PAE_PDPT_PHYS:
3214 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3215 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3216 case PGMPOOLKIND_64BIT_PML4:
3217 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3218 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3219#if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3220 case PGMPOOLKIND_ROOT_PAE_PD:
3221#endif
3222#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3223 case PGMPOOLKIND_PAE_PDPT:
3224 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3225#else
3226 case PGMPOOLKIND_ROOT_PDPT:
3227#endif
3228 case PGMPOOLKIND_ROOT_NESTED:
3229 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3230 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3231 u.pau64[iUserTable] = 0;
3232 break;
3233
3234 default:
3235 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3236 }
3237}
3238
3239
3240/**
3241 * Clears all users of a page.
3242 */
3243static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3244{
3245 /*
3246 * Free all the user records.
3247 */
3248 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3249
3250 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3251 uint16_t i = pPage->iUserHead;
3252 while (i != NIL_PGMPOOL_USER_INDEX)
3253 {
3254 /* Clear enter in user table. */
3255 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3256
3257 /* Free it. */
3258 const uint16_t iNext = paUsers[i].iNext;
3259 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3260 paUsers[i].iNext = pPool->iUserFreeHead;
3261 pPool->iUserFreeHead = i;
3262
3263 /* Next. */
3264 i = iNext;
3265 }
3266 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3267}
3268
3269#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3270
3271/**
3272 * Allocates a new physical cross reference extent.
3273 *
3274 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3275 * @param pVM The VM handle.
3276 * @param piPhysExt Where to store the phys ext index.
3277 */
3278PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3279{
3280 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3281 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3282 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3283 {
3284 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3285 return NULL;
3286 }
3287 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3288 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3289 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3290 *piPhysExt = iPhysExt;
3291 return pPhysExt;
3292}
3293
3294
3295/**
3296 * Frees a physical cross reference extent.
3297 *
3298 * @param pVM The VM handle.
3299 * @param iPhysExt The extent to free.
3300 */
3301void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3302{
3303 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3304 Assert(iPhysExt < pPool->cMaxPhysExts);
3305 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3306 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3307 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3308 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3309 pPool->iPhysExtFreeHead = iPhysExt;
3310}
3311
3312
3313/**
3314 * Frees a physical cross reference extent.
3315 *
3316 * @param pVM The VM handle.
3317 * @param iPhysExt The extent to free.
3318 */
3319void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3320{
3321 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3322
3323 const uint16_t iPhysExtStart = iPhysExt;
3324 PPGMPOOLPHYSEXT pPhysExt;
3325 do
3326 {
3327 Assert(iPhysExt < pPool->cMaxPhysExts);
3328 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3329 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3330 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3331
3332 /* next */
3333 iPhysExt = pPhysExt->iNext;
3334 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3335
3336 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3337 pPool->iPhysExtFreeHead = iPhysExtStart;
3338}
3339
3340
3341/**
3342 * Insert a reference into a list of physical cross reference extents.
3343 *
3344 * @returns The new tracking data for PGMPAGE.
3345 *
3346 * @param pVM The VM handle.
3347 * @param iPhysExt The physical extent index of the list head.
3348 * @param iShwPT The shadow page table index.
3349 *
3350 */
3351static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3352{
3353 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3354 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3355
3356 /* special common case. */
3357 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3358 {
3359 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3360 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3361 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
3362 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3363 }
3364
3365 /* general treatment. */
3366 const uint16_t iPhysExtStart = iPhysExt;
3367 unsigned cMax = 15;
3368 for (;;)
3369 {
3370 Assert(iPhysExt < pPool->cMaxPhysExts);
3371 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3372 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3373 {
3374 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3375 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3376 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3377 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3378 }
3379 if (!--cMax)
3380 {
3381 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3382 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3383 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
3384 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3385 }
3386 }
3387
3388 /* add another extent to the list. */
3389 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3390 if (!pNew)
3391 {
3392 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3393 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3394 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3395 }
3396 pNew->iNext = iPhysExtStart;
3397 pNew->aidx[0] = iShwPT;
3398 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3399 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3400}
3401
3402
3403/**
3404 * Add a reference to guest physical page where extents are in use.
3405 *
3406 * @returns The new tracking data for PGMPAGE.
3407 *
3408 * @param pVM The VM handle.
3409 * @param u16 The ram range flags (top 16-bits).
3410 * @param iShwPT The shadow page table index.
3411 */
3412uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3413{
3414 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3415 {
3416 /*
3417 * Convert to extent list.
3418 */
3419 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3420 uint16_t iPhysExt;
3421 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3422 if (pPhysExt)
3423 {
3424 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3425 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3426 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3427 pPhysExt->aidx[1] = iShwPT;
3428 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3429 }
3430 else
3431 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3432 }
3433 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3434 {
3435 /*
3436 * Insert into the extent list.
3437 */
3438 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3439 }
3440 else
3441 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3442 return u16;
3443}
3444
3445
3446/**
3447 * Clear references to guest physical memory.
3448 *
3449 * @param pPool The pool.
3450 * @param pPage The page.
3451 * @param pPhysPage Pointer to the aPages entry in the ram range.
3452 */
3453void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3454{
3455 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3456 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3457
3458 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3459 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3460 {
3461 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3462 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3463 do
3464 {
3465 Assert(iPhysExt < pPool->cMaxPhysExts);
3466
3467 /*
3468 * Look for the shadow page and check if it's all freed.
3469 */
3470 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3471 {
3472 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3473 {
3474 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3475
3476 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3477 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3478 {
3479 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3480 return;
3481 }
3482
3483 /* we can free the node. */
3484 PVM pVM = pPool->CTX_SUFF(pVM);
3485 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3486 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3487 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3488 {
3489 /* lonely node */
3490 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3491 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3492 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3493 }
3494 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3495 {
3496 /* head */
3497 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3498 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3499 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3500 }
3501 else
3502 {
3503 /* in list */
3504 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3505 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3506 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3507 }
3508 iPhysExt = iPhysExtNext;
3509 return;
3510 }
3511 }
3512
3513 /* next */
3514 iPhysExtPrev = iPhysExt;
3515 iPhysExt = paPhysExts[iPhysExt].iNext;
3516 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3517
3518 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3519 }
3520 else /* nothing to do */
3521 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3522}
3523
3524
3525/**
3526 * Clear references to guest physical memory.
3527 *
3528 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3529 * is assumed to be correct, so the linear search can be skipped and we can assert
3530 * at an earlier point.
3531 *
3532 * @param pPool The pool.
3533 * @param pPage The page.
3534 * @param HCPhys The host physical address corresponding to the guest page.
3535 * @param GCPhys The guest physical address corresponding to HCPhys.
3536 */
3537static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3538{
3539 /*
3540 * Walk range list.
3541 */
3542 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3543 while (pRam)
3544 {
3545 RTGCPHYS off = GCPhys - pRam->GCPhys;
3546 if (off < pRam->cb)
3547 {
3548 /* does it match? */
3549 const unsigned iPage = off >> PAGE_SHIFT;
3550 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3551#ifdef LOG_ENABLED
3552RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3553Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3554#endif
3555 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3556 {
3557 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3558 return;
3559 }
3560 break;
3561 }
3562 pRam = pRam->CTX_SUFF(pNext);
3563 }
3564 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3565}
3566
3567
3568/**
3569 * Clear references to guest physical memory.
3570 *
3571 * @param pPool The pool.
3572 * @param pPage The page.
3573 * @param HCPhys The host physical address corresponding to the guest page.
3574 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3575 */
3576static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3577{
3578 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
3579
3580 /*
3581 * Walk range list.
3582 */
3583 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3584 while (pRam)
3585 {
3586 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3587 if (off < pRam->cb)
3588 {
3589 /* does it match? */
3590 const unsigned iPage = off >> PAGE_SHIFT;
3591 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3592 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3593 {
3594 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3595 return;
3596 }
3597 break;
3598 }
3599 pRam = pRam->CTX_SUFF(pNext);
3600 }
3601
3602 /*
3603 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3604 */
3605 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3606 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3607 while (pRam)
3608 {
3609 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3610 while (iPage-- > 0)
3611 {
3612 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3613 {
3614 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3615 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3616 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3617 return;
3618 }
3619 }
3620 pRam = pRam->CTX_SUFF(pNext);
3621 }
3622
3623 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3624}
3625
3626
3627/**
3628 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3629 *
3630 * @param pPool The pool.
3631 * @param pPage The page.
3632 * @param pShwPT The shadow page table (mapping of the page).
3633 * @param pGstPT The guest page table.
3634 */
3635DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3636{
3637 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3638 if (pShwPT->a[i].n.u1Present)
3639 {
3640 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3641 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3642 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3643 if (!--pPage->cPresent)
3644 break;
3645 }
3646}
3647
3648
3649/**
3650 * Clear references to guest physical memory in a PAE / 32-bit page table.
3651 *
3652 * @param pPool The pool.
3653 * @param pPage The page.
3654 * @param pShwPT The shadow page table (mapping of the page).
3655 * @param pGstPT The guest page table (just a half one).
3656 */
3657DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3658{
3659 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3660 if (pShwPT->a[i].n.u1Present)
3661 {
3662 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
3663 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3664 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3665 }
3666}
3667
3668
3669/**
3670 * Clear references to guest physical memory in a PAE / PAE page table.
3671 *
3672 * @param pPool The pool.
3673 * @param pPage The page.
3674 * @param pShwPT The shadow page table (mapping of the page).
3675 * @param pGstPT The guest page table.
3676 */
3677DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3678{
3679 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3680 if (pShwPT->a[i].n.u1Present)
3681 {
3682 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3683 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3684 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3685 }
3686}
3687
3688
3689/**
3690 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3691 *
3692 * @param pPool The pool.
3693 * @param pPage The page.
3694 * @param pShwPT The shadow page table (mapping of the page).
3695 */
3696DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3697{
3698 RTGCPHYS GCPhys = pPage->GCPhys;
3699 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3700 if (pShwPT->a[i].n.u1Present)
3701 {
3702 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3703 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3704 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3705 }
3706}
3707
3708
3709/**
3710 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3711 *
3712 * @param pPool The pool.
3713 * @param pPage The page.
3714 * @param pShwPT The shadow page table (mapping of the page).
3715 */
3716DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3717{
3718 RTGCPHYS GCPhys = pPage->GCPhys;
3719 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3720 if (pShwPT->a[i].n.u1Present)
3721 {
3722 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3723 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3724 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3725 }
3726}
3727
3728#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3729
3730
3731#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3732/**
3733 * Clear references to shadowed pages in a 32 bits page directory.
3734 *
3735 * @param pPool The pool.
3736 * @param pPage The page.
3737 * @param pShwPD The shadow page directory (mapping of the page).
3738 */
3739DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
3740{
3741 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3742 {
3743 if ( pShwPD->a[i].n.u1Present
3744 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3745 )
3746 {
3747 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
3748 if (pSubPage)
3749 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3750 else
3751 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
3752 }
3753 }
3754}
3755#endif
3756
3757/**
3758 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3759 *
3760 * @param pPool The pool.
3761 * @param pPage The page.
3762 * @param pShwPD The shadow page directory (mapping of the page).
3763 */
3764DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3765{
3766 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3767 {
3768 if ( pShwPD->a[i].n.u1Present
3769#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3770 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3771#endif
3772 )
3773 {
3774 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3775 if (pSubPage)
3776 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3777 else
3778 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3779 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3780 }
3781 }
3782}
3783
3784
3785/**
3786 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3787 *
3788 * @param pPool The pool.
3789 * @param pPage The page.
3790 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3791 */
3792DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3793{
3794 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3795 {
3796 if ( pShwPDPT->a[i].n.u1Present
3797#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3798 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
3799#endif
3800 )
3801 {
3802 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3803 if (pSubPage)
3804 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3805 else
3806 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3807 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3808 }
3809 }
3810}
3811
3812
3813/**
3814 * Clear references to shadowed pages in a 64-bit level 4 page table.
3815 *
3816 * @param pPool The pool.
3817 * @param pPage The page.
3818 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3819 */
3820DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3821{
3822 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3823 {
3824 if (pShwPML4->a[i].n.u1Present)
3825 {
3826 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3827 if (pSubPage)
3828 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3829 else
3830 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3831 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3832 }
3833 }
3834}
3835
3836
3837/**
3838 * Clear references to shadowed pages in an EPT page table.
3839 *
3840 * @param pPool The pool.
3841 * @param pPage The page.
3842 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3843 */
3844DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3845{
3846 RTGCPHYS GCPhys = pPage->GCPhys;
3847 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3848 if (pShwPT->a[i].n.u1Present)
3849 {
3850 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3851 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3852 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3853 }
3854}
3855
3856
3857/**
3858 * Clear references to shadowed pages in an EPT page directory.
3859 *
3860 * @param pPool The pool.
3861 * @param pPage The page.
3862 * @param pShwPD The shadow page directory (mapping of the page).
3863 */
3864DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3865{
3866 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3867 {
3868 if (pShwPD->a[i].n.u1Present)
3869 {
3870 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3871 if (pSubPage)
3872 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3873 else
3874 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3875 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3876 }
3877 }
3878}
3879
3880
3881/**
3882 * Clear references to shadowed pages in an EPT page directory pointer table.
3883 *
3884 * @param pPool The pool.
3885 * @param pPage The page.
3886 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3887 */
3888DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3889{
3890 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3891 {
3892 if (pShwPDPT->a[i].n.u1Present)
3893 {
3894 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3895 if (pSubPage)
3896 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3897 else
3898 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3899 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3900 }
3901 }
3902}
3903
3904
3905/**
3906 * Clears all references made by this page.
3907 *
3908 * This includes other shadow pages and GC physical addresses.
3909 *
3910 * @param pPool The pool.
3911 * @param pPage The page.
3912 */
3913static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3914{
3915 /*
3916 * Map the shadow page and take action according to the page kind.
3917 */
3918 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3919 switch (pPage->enmKind)
3920 {
3921#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3922 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3923 {
3924 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3925 void *pvGst;
3926 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3927 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3928 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3929 break;
3930 }
3931
3932 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3933 {
3934 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3935 void *pvGst;
3936 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3937 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3938 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3939 break;
3940 }
3941
3942 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3943 {
3944 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3945 void *pvGst;
3946 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3947 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3948 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3949 break;
3950 }
3951
3952 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3953 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3954 {
3955 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3956 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3957 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3958 break;
3959 }
3960
3961 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3962 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3963 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3964 {
3965 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3966 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3967 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3968 break;
3969 }
3970
3971#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3972 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3973 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3974 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3975 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3976 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3977 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3978 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3979 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3980 break;
3981#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3982
3983 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3984 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3985 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3986 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3987 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3988 case PGMPOOLKIND_PAE_PD_PHYS:
3989 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3990 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3991 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3992 break;
3993
3994#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3995 case PGMPOOLKIND_32BIT_PD_PHYS:
3996 case PGMPOOLKIND_32BIT_PD:
3997 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
3998 break;
3999
4000 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4001 case PGMPOOLKIND_PAE_PDPT:
4002 case PGMPOOLKIND_PAE_PDPT_PHYS:
4003#endif
4004 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4005 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4006 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4007 break;
4008
4009 case PGMPOOLKIND_64BIT_PML4:
4010 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4011 break;
4012
4013 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4014 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4015 break;
4016
4017 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4018 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4019 break;
4020
4021 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4022 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4023 break;
4024
4025 default:
4026 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4027 }
4028
4029 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4030 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4031 ASMMemZeroPage(pvShw);
4032 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4033 pPage->fZeroed = true;
4034}
4035
4036#endif /* PGMPOOL_WITH_USER_TRACKING */
4037
4038/**
4039 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
4040 *
4041 * @param pPool The pool.
4042 */
4043static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
4044{
4045#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4046 /* Start a subset so we won't run out of mapping space. */
4047 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4048 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4049#endif
4050
4051 /*
4052 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
4053 */
4054 Assert(NIL_PGMPOOL_IDX == 0);
4055 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
4056 {
4057 /*
4058 * Get the page address.
4059 */
4060 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4061 union
4062 {
4063 uint64_t *pau64;
4064 uint32_t *pau32;
4065 } u;
4066
4067 /*
4068 * Mark stuff not present.
4069 */
4070 switch (pPage->enmKind)
4071 {
4072#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4073 case PGMPOOLKIND_ROOT_32BIT_PD:
4074 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4075 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
4076 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
4077 u.pau32[iPage] = 0;
4078 break;
4079
4080 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4081 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4082 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
4083 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
4084 u.pau64[iPage] = 0;
4085 break;
4086
4087 case PGMPOOLKIND_ROOT_PDPT:
4088 /* Not root of shadowed pages currently, ignore it. */
4089 break;
4090#endif
4091
4092 case PGMPOOLKIND_ROOT_NESTED:
4093 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4094 ASMMemZero32(u.pau64, PAGE_SIZE);
4095 break;
4096 }
4097 }
4098
4099 /*
4100 * Paranoia (to be removed), flag a global CR3 sync.
4101 */
4102 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
4103
4104#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4105 /* Pop the subset. */
4106 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4107#endif
4108}
4109
4110
4111/**
4112 * Flushes the entire cache.
4113 *
4114 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4115 * and execute this CR3 flush.
4116 *
4117 * @param pPool The pool.
4118 */
4119static void pgmPoolFlushAllInt(PPGMPOOL pPool)
4120{
4121 PVM pVM = pPool->CTX_SUFF(pVM);
4122
4123 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
4124 LogFlow(("pgmPoolFlushAllInt:\n"));
4125
4126 /*
4127 * If there are no pages in the pool, there is nothing to do.
4128 */
4129 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4130 {
4131 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4132 return;
4133 }
4134
4135#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4136 /* Unmap the old CR3 value before flushing everything. */
4137 int rc = PGM_BTH_PFN(UnmapCR3, pVM)(pVM);
4138 AssertRC(rc);
4139#endif
4140
4141 /*
4142 * Nuke the free list and reinsert all pages into it.
4143 */
4144 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4145 {
4146 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4147
4148#ifdef IN_RING3
4149 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
4150#endif
4151#ifdef PGMPOOL_WITH_MONITORING
4152 if (pPage->fMonitored)
4153 pgmPoolMonitorFlush(pPool, pPage);
4154 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4155 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4156 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4157 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4158 pPage->cModifications = 0;
4159#endif
4160 pPage->GCPhys = NIL_RTGCPHYS;
4161 pPage->enmKind = PGMPOOLKIND_FREE;
4162 Assert(pPage->idx == i);
4163 pPage->iNext = i + 1;
4164 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4165 pPage->fSeenNonGlobal = false;
4166 pPage->fMonitored= false;
4167 pPage->fCached = false;
4168 pPage->fReusedFlushPending = false;
4169#ifdef PGMPOOL_WITH_USER_TRACKING
4170 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4171#else
4172 pPage->fCR3Mix = false;
4173#endif
4174#ifdef PGMPOOL_WITH_CACHE
4175 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4176 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4177#endif
4178 }
4179 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4180 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4181 pPool->cUsedPages = 0;
4182
4183#ifdef PGMPOOL_WITH_USER_TRACKING
4184 /*
4185 * Zap and reinitialize the user records.
4186 */
4187 pPool->cPresent = 0;
4188 pPool->iUserFreeHead = 0;
4189 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4190 const unsigned cMaxUsers = pPool->cMaxUsers;
4191 for (unsigned i = 0; i < cMaxUsers; i++)
4192 {
4193 paUsers[i].iNext = i + 1;
4194 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4195 paUsers[i].iUserTable = 0xfffffffe;
4196 }
4197 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4198#endif
4199
4200#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4201 /*
4202 * Clear all the GCPhys links and rebuild the phys ext free list.
4203 */
4204 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
4205 pRam;
4206 pRam = pRam->CTX_SUFF(pNext))
4207 {
4208 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4209 while (iPage-- > 0)
4210 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
4211 }
4212
4213 pPool->iPhysExtFreeHead = 0;
4214 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4215 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4216 for (unsigned i = 0; i < cMaxPhysExts; i++)
4217 {
4218 paPhysExts[i].iNext = i + 1;
4219 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4220 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4221 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4222 }
4223 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4224#endif
4225
4226#ifdef PGMPOOL_WITH_MONITORING
4227 /*
4228 * Just zap the modified list.
4229 */
4230 pPool->cModifiedPages = 0;
4231 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4232#endif
4233
4234#ifdef PGMPOOL_WITH_CACHE
4235 /*
4236 * Clear the GCPhys hash and the age list.
4237 */
4238 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4239 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4240 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4241 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4242#endif
4243
4244 /*
4245 * Flush all the special root pages.
4246 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4247 */
4248 pgmPoolFlushAllSpecialRoots(pPool);
4249 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4250 {
4251 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4252 pPage->iNext = NIL_PGMPOOL_IDX;
4253#ifdef PGMPOOL_WITH_MONITORING
4254 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4255 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4256 pPage->cModifications = 0;
4257 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4258 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4259 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4260 if (pPage->fMonitored)
4261 {
4262 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4263 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4264 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4265 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4266 pPool->pszAccessHandler);
4267 AssertFatalRCSuccess(rc);
4268# ifdef PGMPOOL_WITH_CACHE
4269 pgmPoolHashInsert(pPool, pPage);
4270# endif
4271 }
4272#endif
4273#ifdef PGMPOOL_WITH_USER_TRACKING
4274 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4275#endif
4276#ifdef PGMPOOL_WITH_CACHE
4277 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4278 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4279#endif
4280 }
4281
4282 /*
4283 * Finally, assert the FF.
4284 */
4285 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
4286
4287 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4288}
4289
4290
4291/**
4292 * Flushes a pool page.
4293 *
4294 * This moves the page to the free list after removing all user references to it.
4295 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
4296 *
4297 * @returns VBox status code.
4298 * @retval VINF_SUCCESS on success.
4299 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
4300 * @param pPool The pool.
4301 * @param HCPhys The HC physical address of the shadow page.
4302 */
4303int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4304{
4305 int rc = VINF_SUCCESS;
4306 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4307 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4308 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4309
4310 /*
4311 * Quietly reject any attempts at flushing any of the special root pages.
4312 */
4313 if (pPage->idx < PGMPOOL_IDX_FIRST)
4314 {
4315 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4316 return VINF_SUCCESS;
4317 }
4318
4319 /*
4320 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4321 */
4322#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4323 if (pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
4324 {
4325 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4326 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4327 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4328 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4329 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4330 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4331 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4332 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4333 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4334 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4335#else
4336 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
4337 {
4338 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4,
4339 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4340#endif
4341 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4342 return VINF_SUCCESS;
4343 }
4344
4345#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4346 /* Start a subset so we won't run out of mapping space. */
4347 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4348 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4349#endif
4350
4351 /*
4352 * Mark the page as being in need of a ASMMemZeroPage().
4353 */
4354 pPage->fZeroed = false;
4355
4356#ifdef PGMPOOL_WITH_USER_TRACKING
4357 /*
4358 * Clear the page.
4359 */
4360 pgmPoolTrackClearPageUsers(pPool, pPage);
4361 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4362 pgmPoolTrackDeref(pPool, pPage);
4363 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4364#endif
4365
4366#ifdef PGMPOOL_WITH_CACHE
4367 /*
4368 * Flush it from the cache.
4369 */
4370 pgmPoolCacheFlushPage(pPool, pPage);
4371#endif /* PGMPOOL_WITH_CACHE */
4372
4373#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4374 /* Heavy stuff done. */
4375 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4376#endif
4377
4378#ifdef PGMPOOL_WITH_MONITORING
4379 /*
4380 * Deregistering the monitoring.
4381 */
4382 if (pPage->fMonitored)
4383 rc = pgmPoolMonitorFlush(pPool, pPage);
4384#endif
4385
4386 /*
4387 * Free the page.
4388 */
4389 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4390 pPage->iNext = pPool->iFreeHead;
4391 pPool->iFreeHead = pPage->idx;
4392 pPage->enmKind = PGMPOOLKIND_FREE;
4393 pPage->GCPhys = NIL_RTGCPHYS;
4394 pPage->fReusedFlushPending = false;
4395
4396 pPool->cUsedPages--;
4397 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4398 return rc;
4399}
4400
4401
4402/**
4403 * Frees a usage of a pool page.
4404 *
4405 * The caller is responsible to updating the user table so that it no longer
4406 * references the shadow page.
4407 *
4408 * @param pPool The pool.
4409 * @param HCPhys The HC physical address of the shadow page.
4410 * @param iUser The shadow page pool index of the user table.
4411 * @param iUserTable The index into the user table (shadowed).
4412 */
4413void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4414{
4415 STAM_PROFILE_START(&pPool->StatFree, a);
4416 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4417 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4418 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4419#ifdef PGMPOOL_WITH_USER_TRACKING
4420 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4421#endif
4422#ifdef PGMPOOL_WITH_CACHE
4423 if (!pPage->fCached)
4424#endif
4425 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
4426 STAM_PROFILE_STOP(&pPool->StatFree, a);
4427}
4428
4429
4430/**
4431 * Makes one or more free page free.
4432 *
4433 * @returns VBox status code.
4434 * @retval VINF_SUCCESS on success.
4435 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4436 *
4437 * @param pPool The pool.
4438 * @param enmKind Page table kind
4439 * @param iUser The user of the page.
4440 */
4441static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4442{
4443 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4444
4445 /*
4446 * If the pool isn't full grown yet, expand it.
4447 */
4448 if ( pPool->cCurPages < pPool->cMaxPages
4449#if defined(VBOX_WITH_PGMPOOL_PAGING_ONLY) && defined(IN_RC)
4450 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4451 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4452 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4453#endif
4454 )
4455 {
4456 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4457#ifdef IN_RING3
4458 int rc = PGMR3PoolGrow(pPool->pVMR3);
4459#else
4460 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
4461#endif
4462 if (RT_FAILURE(rc))
4463 return rc;
4464 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4465 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4466 return VINF_SUCCESS;
4467 }
4468
4469#ifdef PGMPOOL_WITH_CACHE
4470 /*
4471 * Free one cached page.
4472 */
4473 return pgmPoolCacheFreeOne(pPool, iUser);
4474#else
4475 /*
4476 * Flush the pool.
4477 *
4478 * If we have tracking enabled, it should be possible to come up with
4479 * a cheap replacement strategy...
4480 */
4481 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
4482 Assert(!CPUMIsGuestInLongMode(pVM));
4483 pgmPoolFlushAllInt(pPool);
4484 return VERR_PGM_POOL_FLUSHED;
4485#endif
4486}
4487
4488
4489/**
4490 * Allocates a page from the pool.
4491 *
4492 * This page may actually be a cached page and not in need of any processing
4493 * on the callers part.
4494 *
4495 * @returns VBox status code.
4496 * @retval VINF_SUCCESS if a NEW page was allocated.
4497 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4498 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4499 * @param pVM The VM handle.
4500 * @param GCPhys The GC physical address of the page we're gonna shadow.
4501 * For 4MB and 2MB PD entries, it's the first address the
4502 * shadow PT is covering.
4503 * @param enmKind The kind of mapping.
4504 * @param iUser The shadow page pool index of the user table.
4505 * @param iUserTable The index into the user table (shadowed).
4506 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4507 */
4508int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
4509{
4510 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4511 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4512 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4513 *ppPage = NULL;
4514 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4515 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4516 * Assert(!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4517
4518#ifdef PGMPOOL_WITH_CACHE
4519 if (pPool->fCacheEnabled)
4520 {
4521 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
4522 if (RT_SUCCESS(rc2))
4523 {
4524 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4525 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4526 return rc2;
4527 }
4528 }
4529#endif
4530
4531 /*
4532 * Allocate a new one.
4533 */
4534 int rc = VINF_SUCCESS;
4535 uint16_t iNew = pPool->iFreeHead;
4536 if (iNew == NIL_PGMPOOL_IDX)
4537 {
4538 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4539 if (RT_FAILURE(rc))
4540 {
4541 if (rc != VERR_PGM_POOL_CLEARED)
4542 {
4543 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4544 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4545 return rc;
4546 }
4547 Log(("pgmPoolMakeMoreFreePages failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
4548 rc = VERR_PGM_POOL_FLUSHED;
4549 }
4550 iNew = pPool->iFreeHead;
4551 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4552 }
4553
4554 /* unlink the free head */
4555 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4556 pPool->iFreeHead = pPage->iNext;
4557 pPage->iNext = NIL_PGMPOOL_IDX;
4558
4559 /*
4560 * Initialize it.
4561 */
4562 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4563 pPage->enmKind = enmKind;
4564 pPage->GCPhys = GCPhys;
4565 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4566 pPage->fMonitored = false;
4567 pPage->fCached = false;
4568 pPage->fReusedFlushPending = false;
4569#ifdef PGMPOOL_WITH_MONITORING
4570 pPage->cModifications = 0;
4571 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4572 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4573#else
4574 pPage->fCR3Mix = false;
4575#endif
4576#ifdef PGMPOOL_WITH_USER_TRACKING
4577 pPage->cPresent = 0;
4578 pPage->iFirstPresent = ~0;
4579
4580 /*
4581 * Insert into the tracking and cache. If this fails, free the page.
4582 */
4583 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4584 if (RT_FAILURE(rc3))
4585 {
4586 if (rc3 != VERR_PGM_POOL_CLEARED)
4587 {
4588 pPool->cUsedPages--;
4589 pPage->enmKind = PGMPOOLKIND_FREE;
4590 pPage->GCPhys = NIL_RTGCPHYS;
4591 pPage->iNext = pPool->iFreeHead;
4592 pPool->iFreeHead = pPage->idx;
4593 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4594 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4595 return rc3;
4596 }
4597 Log(("pgmPoolTrackInsert failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4598 rc = VERR_PGM_POOL_FLUSHED;
4599 }
4600#endif /* PGMPOOL_WITH_USER_TRACKING */
4601
4602 /*
4603 * Commit the allocation, clear the page and return.
4604 */
4605#ifdef VBOX_WITH_STATISTICS
4606 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4607 pPool->cUsedPagesHigh = pPool->cUsedPages;
4608#endif
4609
4610 if (!pPage->fZeroed)
4611 {
4612 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4613 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4614 ASMMemZeroPage(pv);
4615 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4616 }
4617
4618 *ppPage = pPage;
4619 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4620 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4621 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4622 return rc;
4623}
4624
4625
4626/**
4627 * Frees a usage of a pool page.
4628 *
4629 * @param pVM The VM handle.
4630 * @param HCPhys The HC physical address of the shadow page.
4631 * @param iUser The shadow page pool index of the user table.
4632 * @param iUserTable The index into the user table (shadowed).
4633 */
4634void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4635{
4636 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4637 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4638 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4639}
4640
4641
4642/**
4643 * Gets a in-use page in the pool by it's physical address.
4644 *
4645 * @returns Pointer to the page.
4646 * @param pVM The VM handle.
4647 * @param HCPhys The HC physical address of the shadow page.
4648 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4649 */
4650PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4651{
4652 /** @todo profile this! */
4653 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4654 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4655 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%RHp -> %p:{.idx=%d .GCPhys=%RGp .enmKind=%s}\n",
4656 HCPhys, pPage, pPage->idx, pPage->GCPhys, pgmPoolPoolKindToStr(pPage->enmKind)));
4657 return pPage;
4658}
4659
4660
4661/**
4662 * Flushes the entire cache.
4663 *
4664 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4665 * and execute this CR3 flush.
4666 *
4667 * @param pPool The pool.
4668 */
4669void pgmPoolFlushAll(PVM pVM)
4670{
4671 LogFlow(("pgmPoolFlushAll:\n"));
4672 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4673}
4674
4675#ifdef LOG_ENABLED
4676static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
4677{
4678 switch(enmKind)
4679 {
4680 case PGMPOOLKIND_INVALID:
4681 return "PGMPOOLKIND_INVALID";
4682 case PGMPOOLKIND_FREE:
4683 return "PGMPOOLKIND_FREE";
4684 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4685 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4686 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4687 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4688 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4689 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4690 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4691 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4692 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4693 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4694 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4695 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
4696 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4697 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
4698 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4699 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
4700 case PGMPOOLKIND_32BIT_PD:
4701 return "PGMPOOLKIND_32BIT_PD";
4702 case PGMPOOLKIND_32BIT_PD_PHYS:
4703 return "PGMPOOLKIND_32BIT_PD_PHYS";
4704 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4705 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
4706 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4707 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
4708 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4709 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
4710 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4711 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
4712 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4713 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
4714 case PGMPOOLKIND_PAE_PD_PHYS:
4715 return "PGMPOOLKIND_PAE_PD_PHYS";
4716 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4717 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
4718 case PGMPOOLKIND_PAE_PDPT:
4719 return "PGMPOOLKIND_PAE_PDPT";
4720 case PGMPOOLKIND_PAE_PDPT_PHYS:
4721 return "PGMPOOLKIND_PAE_PDPT_PHYS";
4722 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4723 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
4724 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4725 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
4726 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4727 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
4728 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4729 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
4730 case PGMPOOLKIND_64BIT_PML4:
4731 return "PGMPOOLKIND_64BIT_PML4";
4732 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4733 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
4734 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4735 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
4736 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4737 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
4738#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4739 case PGMPOOLKIND_ROOT_32BIT_PD:
4740 return "PGMPOOLKIND_ROOT_32BIT_PD";
4741 case PGMPOOLKIND_ROOT_PAE_PD:
4742 return "PGMPOOLKIND_ROOT_PAE_PD";
4743 case PGMPOOLKIND_ROOT_PDPT:
4744 return "PGMPOOLKIND_ROOT_PDPT";
4745#endif
4746 case PGMPOOLKIND_ROOT_NESTED:
4747 return "PGMPOOLKIND_ROOT_NESTED";
4748 }
4749 return "Unknown kind!";
4750}
4751#endif
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette