VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 17203

Last change on this file since 17203 was 17203, checked in by vboxsync, 16 years ago

comment added

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 172.5 KB
Line 
1/* $Id: PGMAllPool.cpp 17203 2009-02-27 13:14:53Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48__BEGIN_DECLS
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50#ifdef PGMPOOL_WITH_USER_TRACKING
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54#endif
55#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
56static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
57#endif
58#ifdef PGMPOOL_WITH_CACHE
59static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
60#endif
61#ifdef PGMPOOL_WITH_MONITORING
62static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
63#endif
64#ifndef IN_RING3
65DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
66#endif
67#ifdef LOG_ENABLED
68static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
69#endif
70__END_DECLS
71
72
73/**
74 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
75 *
76 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
77 * @param enmKind The page kind.
78 */
79DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
80{
81 switch (enmKind)
82 {
83 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
84 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
85 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
86 return true;
87 default:
88 return false;
89 }
90}
91
92
93#if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
94/**
95 * Maps a pool page into the current context.
96 *
97 * @returns Pointer to the mapping.
98 * @param pPGM Pointer to the PGM instance data.
99 * @param pPage The page to map.
100 */
101void *pgmPoolMapPageFallback(PPGM pPGM, PPGMPOOLPAGE pPage)
102{
103 /* general pages are take care of by the inlined part, it
104 only ends up here in case of failure. */
105 AssertReleaseReturn(pPage->idx < PGMPOOL_IDX_FIRST, NULL);
106
107/** @todo make sure HCPhys is valid for *all* indexes. */
108 /* special pages. */
109# ifdef IN_RC
110 switch (pPage->idx)
111 {
112# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
113 case PGMPOOL_IDX_PD:
114 case PGMPOOL_IDX_PDPT:
115 case PGMPOOL_IDX_AMD64_CR3:
116 return pPGM->pShwRootRC;
117# else
118 case PGMPOOL_IDX_PD:
119 return pPGM->pShw32BitPdRC;
120 case PGMPOOL_IDX_PAE_PD:
121 case PGMPOOL_IDX_PAE_PD_0:
122 return pPGM->apShwPaePDsRC[0];
123 case PGMPOOL_IDX_PAE_PD_1:
124 return pPGM->apShwPaePDsRC[1];
125 case PGMPOOL_IDX_PAE_PD_2:
126 return pPGM->apShwPaePDsRC[2];
127 case PGMPOOL_IDX_PAE_PD_3:
128 return pPGM->apShwPaePDsRC[3];
129 case PGMPOOL_IDX_PDPT:
130 return pPGM->pShwPaePdptRC;
131# endif
132 default:
133 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
134 return NULL;
135 }
136
137# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
138 RTHCPHYS HCPhys;
139 switch (pPage->idx)
140 {
141# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
142 case PGMPOOL_IDX_PD:
143 case PGMPOOL_IDX_PDPT:
144 case PGMPOOL_IDX_AMD64_CR3:
145 HCPhys = pPGM->HCPhysShwCR3;
146 break;
147
148 case PGMPOOL_IDX_NESTED_ROOT:
149 HCPhys = pPGM->HCPhysShwNestedRoot;
150 break;
151# else
152 case PGMPOOL_IDX_PD:
153 HCPhys = pPGM->HCPhysShw32BitPD;
154 break;
155 case PGMPOOL_IDX_PAE_PD_0:
156 HCPhys = pPGM->aHCPhysPaePDs[0];
157 break;
158 case PGMPOOL_IDX_PAE_PD_1:
159 HCPhys = pPGM->aHCPhysPaePDs[1];
160 break;
161 case PGMPOOL_IDX_PAE_PD_2:
162 HCPhys = pPGM->aHCPhysPaePDs[2];
163 break;
164 case PGMPOOL_IDX_PAE_PD_3:
165 HCPhys = pPGM->aHCPhysPaePDs[3];
166 break;
167 case PGMPOOL_IDX_PDPT:
168 HCPhys = pPGM->HCPhysShwPaePdpt;
169 break;
170 case PGMPOOL_IDX_NESTED_ROOT:
171 HCPhys = pPGM->HCPhysShwNestedRoot;
172 break;
173 case PGMPOOL_IDX_PAE_PD:
174 AssertReleaseMsgFailed(("PGMPOOL_IDX_PAE_PD is not usable in VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 context\n"));
175 return NULL;
176# endif
177 default:
178 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
179 return NULL;
180 }
181 AssertMsg(HCPhys && HCPhys != NIL_RTHCPHYS && !(PAGE_OFFSET_MASK & HCPhys), ("%RHp\n", HCPhys));
182
183 void *pv;
184 pgmR0DynMapHCPageInlined(pPGM, HCPhys, &pv);
185 return pv;
186# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
187}
188#endif /* IN_RC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
189
190
191#ifdef PGMPOOL_WITH_MONITORING
192/**
193 * Determin the size of a write instruction.
194 * @returns number of bytes written.
195 * @param pDis The disassembler state.
196 */
197static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
198{
199 /*
200 * This is very crude and possibly wrong for some opcodes,
201 * but since it's not really supposed to be called we can
202 * probably live with that.
203 */
204 return DISGetParamSize(pDis, &pDis->param1);
205}
206
207
208/**
209 * Flushes a chain of pages sharing the same access monitor.
210 *
211 * @returns VBox status code suitable for scheduling.
212 * @param pPool The pool.
213 * @param pPage A page in the chain.
214 */
215int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
216{
217 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
218
219 /*
220 * Find the list head.
221 */
222 uint16_t idx = pPage->idx;
223 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
224 {
225 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
226 {
227 idx = pPage->iMonitoredPrev;
228 Assert(idx != pPage->idx);
229 pPage = &pPool->aPages[idx];
230 }
231 }
232
233 /*
234 * Iterate the list flushing each shadow page.
235 */
236 int rc = VINF_SUCCESS;
237 for (;;)
238 {
239 idx = pPage->iMonitoredNext;
240 Assert(idx != pPage->idx);
241 if (pPage->idx >= PGMPOOL_IDX_FIRST)
242 {
243 int rc2 = pgmPoolFlushPage(pPool, pPage);
244 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
245 rc = VINF_PGM_SYNC_CR3;
246 }
247 /* next */
248 if (idx == NIL_PGMPOOL_IDX)
249 break;
250 pPage = &pPool->aPages[idx];
251 }
252 return rc;
253}
254
255
256/**
257 * Wrapper for getting the current context pointer to the entry being modified.
258 *
259 * @returns VBox status code suitable for scheduling.
260 * @param pVM VM Handle.
261 * @param pvDst Destination address
262 * @param pvSrc Source guest virtual address.
263 * @param GCPhysSrc The source guest physical address.
264 * @param cb Size of data to read
265 */
266DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
267{
268#if defined(IN_RING3)
269 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
270 return VINF_SUCCESS;
271#else
272 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
273 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
274#endif
275}
276
277/**
278 * Process shadow entries before they are changed by the guest.
279 *
280 * For PT entries we will clear them. For PD entries, we'll simply check
281 * for mapping conflicts and set the SyncCR3 FF if found.
282 *
283 * @param pPool The pool.
284 * @param pPage The head page.
285 * @param GCPhysFault The guest physical fault address.
286 * @param uAddress In R0 and GC this is the guest context fault address (flat).
287 * In R3 this is the host context 'fault' address.
288 * @param pCpu The disassembler state for figuring out the write size.
289 * This need not be specified if the caller knows we won't do cross entry accesses.
290 */
291void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, PDISCPUSTATE pCpu)
292{
293 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
294 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
295 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
296
297 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%s cbWrite=%d\n", pvAddress, GCPhysFault, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
298 for (;;)
299 {
300 union
301 {
302 void *pv;
303 PX86PT pPT;
304 PX86PTPAE pPTPae;
305 PX86PD pPD;
306 PX86PDPAE pPDPae;
307 PX86PDPT pPDPT;
308 PX86PML4 pPML4;
309 } uShw;
310
311 switch (pPage->enmKind)
312 {
313 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
314 {
315 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
316 const unsigned iShw = off / sizeof(X86PTE);
317 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
318 if (uShw.pPT->a[iShw].n.u1Present)
319 {
320# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
321 X86PTE GstPte;
322
323 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
324 AssertRC(rc);
325 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
326 pgmPoolTracDerefGCPhysHint(pPool, pPage,
327 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
328 GstPte.u & X86_PTE_PG_MASK);
329# endif
330 uShw.pPT->a[iShw].u = 0;
331 }
332 break;
333 }
334
335 /* page/2 sized */
336 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
337 {
338 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
339 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
340 {
341 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
342 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
343 if (uShw.pPTPae->a[iShw].n.u1Present)
344 {
345# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
346 X86PTE GstPte;
347 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
348 AssertRC(rc);
349
350 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
351 pgmPoolTracDerefGCPhysHint(pPool, pPage,
352 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
353 GstPte.u & X86_PTE_PG_MASK);
354# endif
355 uShw.pPTPae->a[iShw].u = 0;
356 }
357 }
358 break;
359 }
360
361# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
362 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
363 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
364 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
365 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
366 {
367 unsigned iGst = off / sizeof(X86PDE);
368 unsigned iShwPdpt = iGst / 256;
369 unsigned iShw = (iGst % 256) * 2;
370 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
371
372 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x idx = %d page idx=%d\n", iGst, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
373 if (iShwPdpt == pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
374 {
375 for (unsigned i=0;i<2;i++)
376 {
377# ifndef IN_RING0
378 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
379 {
380 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
381 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
382 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
383 break;
384 }
385 else
386# endif /* !IN_RING0 */
387 if (uShw.pPDPae->a[iShw+i].n.u1Present)
388 {
389 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
390 pgmPoolFree(pPool->CTX_SUFF(pVM),
391 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
392 pPage->idx,
393 iShw + i);
394 uShw.pPDPae->a[iShw+i].u = 0;
395 }
396
397 /* paranoia / a bit assumptive. */
398 if ( pCpu
399 && (off & 3)
400 && (off & 3) + cbWrite > 4)
401 {
402 const unsigned iShw2 = iShw + 2 + i;
403 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
404 {
405# ifndef IN_RING0
406 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
407 {
408 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
409 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
410 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
411 break;
412 }
413 else
414# endif /* !IN_RING0 */
415 if (uShw.pPDPae->a[iShw2].n.u1Present)
416 {
417 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
418 pgmPoolFree(pPool->CTX_SUFF(pVM),
419 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
420 pPage->idx,
421 iShw2);
422 uShw.pPDPae->a[iShw2].u = 0;
423 }
424 }
425 }
426 }
427 }
428 break;
429 }
430# endif
431
432
433 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
434 {
435 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
436 const unsigned iShw = off / sizeof(X86PTEPAE);
437 if (uShw.pPTPae->a[iShw].n.u1Present)
438 {
439# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
440 X86PTEPAE GstPte;
441 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
442 AssertRC(rc);
443
444 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
445 pgmPoolTracDerefGCPhysHint(pPool, pPage,
446 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
447 GstPte.u & X86_PTE_PAE_PG_MASK);
448# endif
449 uShw.pPTPae->a[iShw].u = 0;
450 }
451
452 /* paranoia / a bit assumptive. */
453 if ( pCpu
454 && (off & 7)
455 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
456 {
457 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
458 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
459
460 if (uShw.pPTPae->a[iShw2].n.u1Present)
461 {
462# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
463 X86PTEPAE GstPte;
464# ifdef IN_RING3
465 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
466# else
467 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
468# endif
469 AssertRC(rc);
470 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
471 pgmPoolTracDerefGCPhysHint(pPool, pPage,
472 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
473 GstPte.u & X86_PTE_PAE_PG_MASK);
474# endif
475 uShw.pPTPae->a[iShw2].u = 0;
476 }
477 }
478 break;
479 }
480
481# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
482 case PGMPOOLKIND_32BIT_PD:
483# else
484 case PGMPOOLKIND_ROOT_32BIT_PD:
485# endif
486 {
487 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
488 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
489
490# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
491 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
492# endif
493# ifndef IN_RING0
494 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
495 {
496 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
497 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
498 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
499 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
500 break;
501 }
502# endif /* !IN_RING0 */
503# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
504# ifndef IN_RING0
505 else
506# endif /* !IN_RING0 */
507 {
508 if (uShw.pPD->a[iShw].n.u1Present)
509 {
510 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
511 pgmPoolFree(pPool->CTX_SUFF(pVM),
512 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
513 pPage->idx,
514 iShw);
515 uShw.pPD->a[iShw].u = 0;
516 }
517 }
518# endif
519 /* paranoia / a bit assumptive. */
520 if ( pCpu
521 && (off & 3)
522 && (off & 3) + cbWrite > sizeof(X86PTE))
523 {
524 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
525 if ( iShw2 != iShw
526 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
527 {
528# ifndef IN_RING0
529 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
530 {
531 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
532 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
533 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
534 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
535 }
536# endif /* !IN_RING0 */
537# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
538# ifndef IN_RING0
539 else
540# endif /* !IN_RING0 */
541 {
542 if (uShw.pPD->a[iShw2].n.u1Present)
543 {
544 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
545 pgmPoolFree(pPool->CTX_SUFF(pVM),
546 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
547 pPage->idx,
548 iShw2);
549 uShw.pPD->a[iShw2].u = 0;
550 }
551 }
552# endif
553 }
554 }
555#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
556 if ( uShw.pPD->a[iShw].n.u1Present
557 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
558 {
559 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
560# ifdef IN_RC /* TLB load - we're pushing things a bit... */
561 ASMProbeReadByte(pvAddress);
562# endif
563 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
564 uShw.pPD->a[iShw].u = 0;
565 }
566#endif
567 break;
568 }
569
570# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
571 case PGMPOOLKIND_ROOT_PAE_PD:
572 {
573 unsigned iGst = off / sizeof(X86PDE); // ASSUMING 32-bit guest paging!
574 unsigned iShwPdpt = iGst / 256;
575 unsigned iShw = (iGst % 256) * 2;
576 Assert(pPage->idx == PGMPOOL_IDX_PAE_PD);
577 PPGMPOOLPAGE pPage2 = pPage + 1 + iShwPdpt;
578 Assert(pPage2->idx == PGMPOOL_IDX_PAE_PD_0 + iShwPdpt);
579 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage2);
580 for (unsigned i = 0; i < 2; i++, iShw++)
581 {
582 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
583 {
584 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
585 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
586 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw));
587 }
588 /* paranoia / a bit assumptive. */
589 else if ( pCpu
590 && (off & 3)
591 && (off & 3) + cbWrite > 4)
592 {
593 const unsigned iShw2 = iShw + 2;
594 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a) /** @todo was completely wrong, it's better now after #1865 but still wrong from cross PD. */
595 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
596 {
597 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
598 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
599 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
600 }
601 }
602#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
603 if ( uShw.pPDPae->a[iShw].n.u1Present
604 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
605 {
606 LogFlow(("pgmPoolMonitorChainChanging: iShwPdpt=%#x iShw=%#x: %RX64 -> freeing it!\n", iShwPdpt, iShw, uShw.pPDPae->a[iShw].u));
607# ifdef IN_RC /* TLB load - we're pushing things a bit... */
608 ASMProbeReadByte(pvAddress);
609# endif
610 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + iShwPdpt * X86_PG_PAE_ENTRIES);
611 uShw.pPDPae->a[iShw].u = 0;
612 }
613#endif
614 }
615 break;
616 }
617# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
618
619 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
620 {
621 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
622 const unsigned iShw = off / sizeof(X86PDEPAE);
623#ifndef IN_RING0
624 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
625 {
626 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
627 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
628 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
629 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
630 break;
631 }
632#endif /* !IN_RING0 */
633#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
634 /*
635 * Causes trouble when the guest uses a PDE to refer to the whole page table level
636 * structure. (Invalidate here; faults later on when it tries to change the page
637 * table entries -> recheck; probably only applies to the RC case.)
638 */
639# ifndef IN_RING0
640 else
641# endif /* !IN_RING0 */
642 {
643 if (uShw.pPDPae->a[iShw].n.u1Present)
644 {
645 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
646 pgmPoolFree(pPool->CTX_SUFF(pVM),
647 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
648# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
649 pPage->idx,
650 iShw);
651# else
652 /* Note: hardcoded PAE implementation dependency */
653 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
654 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
655# endif
656 uShw.pPDPae->a[iShw].u = 0;
657 }
658 }
659#endif
660 /* paranoia / a bit assumptive. */
661 if ( pCpu
662 && (off & 7)
663 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
664 {
665 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
666 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
667
668#ifndef IN_RING0
669 if ( iShw2 != iShw
670 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
671 {
672 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
673 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
674 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
675 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
676 }
677#endif /* !IN_RING0 */
678#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
679# ifndef IN_RING0
680 else
681# endif /* !IN_RING0 */
682 if (uShw.pPDPae->a[iShw2].n.u1Present)
683 {
684 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
685 pgmPoolFree(pPool->CTX_SUFF(pVM),
686 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
687# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
688 pPage->idx,
689 iShw2);
690# else
691 /* Note: hardcoded PAE implementation dependency */
692 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
693 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
694# endif
695 uShw.pPDPae->a[iShw2].u = 0;
696 }
697#endif
698 }
699 break;
700 }
701
702# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
703 case PGMPOOLKIND_PAE_PDPT:
704# else
705 case PGMPOOLKIND_ROOT_PDPT:
706# endif
707 {
708 /*
709 * Hopefully this doesn't happen very often:
710 * - touching unused parts of the page
711 * - messing with the bits of pd pointers without changing the physical address
712 */
713# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
714 /* PDPT roots are not page aligned; 32 byte only! */
715 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
716# else
717 const unsigned offPdpt = off;
718# endif
719 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
720 const unsigned iShw = offPdpt / sizeof(X86PDPE);
721 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
722 {
723# ifndef IN_RING0
724 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
725 {
726 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
727 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
728 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
729 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
730 break;
731 }
732# endif /* !IN_RING0 */
733# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
734# ifndef IN_RING0
735 else
736# endif /* !IN_RING0 */
737 if (uShw.pPDPT->a[iShw].n.u1Present)
738 {
739 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
740 pgmPoolFree(pPool->CTX_SUFF(pVM),
741 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
742 pPage->idx,
743 iShw);
744 uShw.pPDPT->a[iShw].u = 0;
745 }
746# endif
747
748 /* paranoia / a bit assumptive. */
749 if ( pCpu
750 && (offPdpt & 7)
751 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
752 {
753 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
754 if ( iShw2 != iShw
755 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
756 {
757# ifndef IN_RING0
758 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
759 {
760 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
761 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
762 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
763 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
764 }
765# endif /* !IN_RING0 */
766# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
767# ifndef IN_RING0
768 else
769# endif /* !IN_RING0 */
770 if (uShw.pPDPT->a[iShw2].n.u1Present)
771 {
772 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
773 pgmPoolFree(pPool->CTX_SUFF(pVM),
774 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
775 pPage->idx,
776 iShw2);
777 uShw.pPDPT->a[iShw2].u = 0;
778 }
779# endif
780 }
781 }
782 }
783 break;
784 }
785
786#ifndef IN_RC
787 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
788 {
789 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
790 const unsigned iShw = off / sizeof(X86PDEPAE);
791 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
792 if (uShw.pPDPae->a[iShw].n.u1Present)
793 {
794 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
795 pgmPoolFree(pPool->CTX_SUFF(pVM),
796 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
797 pPage->idx,
798 iShw);
799 uShw.pPDPae->a[iShw].u = 0;
800 }
801 /* paranoia / a bit assumptive. */
802 if ( pCpu
803 && (off & 7)
804 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
805 {
806 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
807 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
808
809 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
810 if (uShw.pPDPae->a[iShw2].n.u1Present)
811 {
812 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
813 pgmPoolFree(pPool->CTX_SUFF(pVM),
814 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
815 pPage->idx,
816 iShw2);
817 uShw.pPDPae->a[iShw2].u = 0;
818 }
819 }
820 break;
821 }
822
823 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
824 {
825 /*
826 * Hopefully this doesn't happen very often:
827 * - messing with the bits of pd pointers without changing the physical address
828 */
829# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
830 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
831# endif
832 {
833 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
834 const unsigned iShw = off / sizeof(X86PDPE);
835 if (uShw.pPDPT->a[iShw].n.u1Present)
836 {
837 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
838 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
839 uShw.pPDPT->a[iShw].u = 0;
840 }
841 /* paranoia / a bit assumptive. */
842 if ( pCpu
843 && (off & 7)
844 && (off & 7) + cbWrite > sizeof(X86PDPE))
845 {
846 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
847 if (uShw.pPDPT->a[iShw2].n.u1Present)
848 {
849 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
850 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
851 uShw.pPDPT->a[iShw2].u = 0;
852 }
853 }
854 }
855 break;
856 }
857
858 case PGMPOOLKIND_64BIT_PML4:
859 {
860 /*
861 * Hopefully this doesn't happen very often:
862 * - messing with the bits of pd pointers without changing the physical address
863 */
864# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
865 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
866# endif
867 {
868 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
869 const unsigned iShw = off / sizeof(X86PDPE);
870 if (uShw.pPML4->a[iShw].n.u1Present)
871 {
872 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
873 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
874 uShw.pPML4->a[iShw].u = 0;
875 }
876 /* paranoia / a bit assumptive. */
877 if ( pCpu
878 && (off & 7)
879 && (off & 7) + cbWrite > sizeof(X86PDPE))
880 {
881 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
882 if (uShw.pPML4->a[iShw2].n.u1Present)
883 {
884 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
885 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
886 uShw.pPML4->a[iShw2].u = 0;
887 }
888 }
889 }
890 break;
891 }
892#endif /* IN_RING0 */
893
894 default:
895 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
896 }
897
898 /* next */
899 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
900 return;
901 pPage = &pPool->aPages[pPage->iMonitoredNext];
902 }
903}
904
905# ifndef IN_RING3
906/**
907 * Checks if a access could be a fork operation in progress.
908 *
909 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
910 *
911 * @returns true if it's likly that we're forking, otherwise false.
912 * @param pPool The pool.
913 * @param pCpu The disassembled instruction.
914 * @param offFault The access offset.
915 */
916DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
917{
918 /*
919 * i386 linux is using btr to clear X86_PTE_RW.
920 * The functions involved are (2.6.16 source inspection):
921 * clear_bit
922 * ptep_set_wrprotect
923 * copy_one_pte
924 * copy_pte_range
925 * copy_pmd_range
926 * copy_pud_range
927 * copy_page_range
928 * dup_mmap
929 * dup_mm
930 * copy_mm
931 * copy_process
932 * do_fork
933 */
934 if ( pCpu->pCurInstr->opcode == OP_BTR
935 && !(offFault & 4)
936 /** @todo Validate that the bit index is X86_PTE_RW. */
937 )
938 {
939 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
940 return true;
941 }
942 return false;
943}
944
945
946/**
947 * Determine whether the page is likely to have been reused.
948 *
949 * @returns true if we consider the page as being reused for a different purpose.
950 * @returns false if we consider it to still be a paging page.
951 * @param pVM VM Handle.
952 * @param pPage The page in question.
953 * @param pRegFrame Trap register frame.
954 * @param pCpu The disassembly info for the faulting instruction.
955 * @param pvFault The fault address.
956 *
957 * @remark The REP prefix check is left to the caller because of STOSD/W.
958 */
959DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
960{
961#ifndef IN_RC
962 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
963 if ( HWACCMHasPendingIrq(pVM)
964 && (pRegFrame->rsp - pvFault) < 32)
965 {
966 /* Fault caused by stack writes while trying to inject an interrupt event. */
967 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
968 return true;
969 }
970#else
971 NOREF(pVM); NOREF(pvFault);
972#endif
973
974 switch (pCpu->pCurInstr->opcode)
975 {
976 /* call implies the actual push of the return address faulted */
977 case OP_CALL:
978 Log4(("pgmPoolMonitorIsReused: CALL\n"));
979 return true;
980 case OP_PUSH:
981 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
982 return true;
983 case OP_PUSHF:
984 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
985 return true;
986 case OP_PUSHA:
987 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
988 return true;
989 case OP_FXSAVE:
990 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
991 return true;
992 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
993 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
994 return true;
995 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
996 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
997 return true;
998 case OP_MOVSWD:
999 case OP_STOSWD:
1000 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
1001 && pRegFrame->rcx >= 0x40
1002 )
1003 {
1004 Assert(pCpu->mode == CPUMODE_64BIT);
1005
1006 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
1007 return true;
1008 }
1009 return false;
1010 }
1011 if ( (pCpu->param1.flags & USE_REG_GEN32)
1012 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
1013 {
1014 Log4(("pgmPoolMonitorIsReused: ESP\n"));
1015 return true;
1016 }
1017
1018 //if (pPage->fCR3Mix)
1019 // return false;
1020 return false;
1021}
1022
1023
1024/**
1025 * Flushes the page being accessed.
1026 *
1027 * @returns VBox status code suitable for scheduling.
1028 * @param pVM The VM handle.
1029 * @param pPool The pool.
1030 * @param pPage The pool page (head).
1031 * @param pCpu The disassembly of the write instruction.
1032 * @param pRegFrame The trap register frame.
1033 * @param GCPhysFault The fault address as guest physical address.
1034 * @param pvFault The fault address.
1035 */
1036static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1037 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1038{
1039 /*
1040 * First, do the flushing.
1041 */
1042 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
1043
1044 /*
1045 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
1046 */
1047 uint32_t cbWritten;
1048 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
1049 if (RT_SUCCESS(rc2))
1050 pRegFrame->rip += pCpu->opsize;
1051 else if (rc2 == VERR_EM_INTERPRETER)
1052 {
1053#ifdef IN_RC
1054 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
1055 {
1056 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
1057 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
1058 rc = VINF_SUCCESS;
1059 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
1060 }
1061 else
1062#endif
1063 {
1064 rc = VINF_EM_RAW_EMULATE_INSTR;
1065 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1066 }
1067 }
1068 else
1069 rc = rc2;
1070
1071 /* See use in pgmPoolAccessHandlerSimple(). */
1072 PGM_INVL_GUEST_TLBS();
1073
1074 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
1075 return rc;
1076
1077}
1078
1079
1080/**
1081 * Handles the STOSD write accesses.
1082 *
1083 * @returns VBox status code suitable for scheduling.
1084 * @param pVM The VM handle.
1085 * @param pPool The pool.
1086 * @param pPage The pool page (head).
1087 * @param pCpu The disassembly of the write instruction.
1088 * @param pRegFrame The trap register frame.
1089 * @param GCPhysFault The fault address as guest physical address.
1090 * @param pvFault The fault address.
1091 */
1092DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1093 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1094{
1095 Assert(pCpu->mode == CPUMODE_32BIT);
1096
1097 Log3(("pgmPoolAccessHandlerSTOSD\n"));
1098
1099 /*
1100 * Increment the modification counter and insert it into the list
1101 * of modified pages the first time.
1102 */
1103 if (!pPage->cModifications++)
1104 pgmPoolMonitorModifiedInsert(pPool, pPage);
1105
1106 /*
1107 * Execute REP STOSD.
1108 *
1109 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
1110 * write situation, meaning that it's safe to write here.
1111 */
1112#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1113 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1114#endif
1115 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
1116 while (pRegFrame->ecx)
1117 {
1118#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1119 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1120 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1121 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1122#else
1123 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1124#endif
1125#ifdef IN_RC
1126 *(uint32_t *)pu32 = pRegFrame->eax;
1127#else
1128 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
1129#endif
1130 pu32 += 4;
1131 GCPhysFault += 4;
1132 pRegFrame->edi += 4;
1133 pRegFrame->ecx--;
1134 }
1135 pRegFrame->rip += pCpu->opsize;
1136
1137#ifdef IN_RC
1138 /* See use in pgmPoolAccessHandlerSimple(). */
1139 PGM_INVL_GUEST_TLBS();
1140#endif
1141
1142 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1143 return VINF_SUCCESS;
1144}
1145
1146
1147/**
1148 * Handles the simple write accesses.
1149 *
1150 * @returns VBox status code suitable for scheduling.
1151 * @param pVM The VM handle.
1152 * @param pPool The pool.
1153 * @param pPage The pool page (head).
1154 * @param pCpu The disassembly of the write instruction.
1155 * @param pRegFrame The trap register frame.
1156 * @param GCPhysFault The fault address as guest physical address.
1157 * @param pvFault The fault address.
1158 */
1159DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1160 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1161{
1162 Log3(("pgmPoolAccessHandlerSimple\n"));
1163 /*
1164 * Increment the modification counter and insert it into the list
1165 * of modified pages the first time.
1166 */
1167 if (!pPage->cModifications++)
1168 pgmPoolMonitorModifiedInsert(pPool, pPage);
1169
1170 /*
1171 * Clear all the pages. ASSUMES that pvFault is readable.
1172 */
1173#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1174 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1175 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1176 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1177 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1178#else
1179 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1180#endif
1181
1182 /*
1183 * Interpret the instruction.
1184 */
1185 uint32_t cb;
1186 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
1187 if (RT_SUCCESS(rc))
1188 pRegFrame->rip += pCpu->opsize;
1189 else if (rc == VERR_EM_INTERPRETER)
1190 {
1191 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1192 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
1193 rc = VINF_EM_RAW_EMULATE_INSTR;
1194 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1195 }
1196
1197#ifdef IN_RC
1198 /*
1199 * Quick hack, with logging enabled we're getting stale
1200 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1201 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1202 * have to be fixed to support this. But that'll have to wait till next week.
1203 *
1204 * An alternative is to keep track of the changed PTEs together with the
1205 * GCPhys from the guest PT. This may proove expensive though.
1206 *
1207 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1208 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1209 */
1210 PGM_INVL_GUEST_TLBS();
1211#endif
1212
1213 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1214 return rc;
1215}
1216
1217/**
1218 * \#PF Handler callback for PT write accesses.
1219 *
1220 * @returns VBox status code (appropriate for GC return).
1221 * @param pVM VM Handle.
1222 * @param uErrorCode CPU Error code.
1223 * @param pRegFrame Trap register frame.
1224 * NULL on DMA and other non CPU access.
1225 * @param pvFault The fault address (cr2).
1226 * @param GCPhysFault The GC physical address corresponding to pvFault.
1227 * @param pvUser User argument.
1228 */
1229DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1230{
1231 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1232 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1233 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1234 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1235
1236 /*
1237 * We should ALWAYS have the list head as user parameter. This
1238 * is because we use that page to record the changes.
1239 */
1240 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1241
1242 /*
1243 * Disassemble the faulting instruction.
1244 */
1245 DISCPUSTATE Cpu;
1246 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1247 AssertRCReturn(rc, rc);
1248
1249 /*
1250 * Check if it's worth dealing with.
1251 */
1252 bool fReused = false;
1253 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1254#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1255 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1256#else
1257 || pPage->fCR3Mix
1258#endif
1259 )
1260 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1261 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1262 {
1263 /*
1264 * Simple instructions, no REP prefix.
1265 */
1266 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1267 {
1268 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1269 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1270 return rc;
1271 }
1272
1273 /*
1274 * Windows is frequently doing small memset() operations (netio test 4k+).
1275 * We have to deal with these or we'll kill the cache and performance.
1276 */
1277 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1278 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1279 && pRegFrame->ecx <= 0x20
1280 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1281 && !((uintptr_t)pvFault & 3)
1282 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1283 && Cpu.mode == CPUMODE_32BIT
1284 && Cpu.opmode == CPUMODE_32BIT
1285 && Cpu.addrmode == CPUMODE_32BIT
1286 && Cpu.prefix == PREFIX_REP
1287 && !pRegFrame->eflags.Bits.u1DF
1288 )
1289 {
1290 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1291 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1292 return rc;
1293 }
1294
1295 /* REP prefix, don't bother. */
1296 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1297 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1298 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1299 }
1300
1301 /*
1302 * Not worth it, so flush it.
1303 *
1304 * If we considered it to be reused, don't to back to ring-3
1305 * to emulate failed instructions since we usually cannot
1306 * interpret then. This may be a bit risky, in which case
1307 * the reuse detection must be fixed.
1308 */
1309 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1310 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1311 rc = VINF_SUCCESS;
1312 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1313 return rc;
1314}
1315
1316# endif /* !IN_RING3 */
1317#endif /* PGMPOOL_WITH_MONITORING */
1318
1319#ifdef PGMPOOL_WITH_CACHE
1320
1321/**
1322 * Inserts a page into the GCPhys hash table.
1323 *
1324 * @param pPool The pool.
1325 * @param pPage The page.
1326 */
1327DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1328{
1329 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1330 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1331 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1332 pPage->iNext = pPool->aiHash[iHash];
1333 pPool->aiHash[iHash] = pPage->idx;
1334}
1335
1336
1337/**
1338 * Removes a page from the GCPhys hash table.
1339 *
1340 * @param pPool The pool.
1341 * @param pPage The page.
1342 */
1343DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1344{
1345 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1346 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1347 if (pPool->aiHash[iHash] == pPage->idx)
1348 pPool->aiHash[iHash] = pPage->iNext;
1349 else
1350 {
1351 uint16_t iPrev = pPool->aiHash[iHash];
1352 for (;;)
1353 {
1354 const int16_t i = pPool->aPages[iPrev].iNext;
1355 if (i == pPage->idx)
1356 {
1357 pPool->aPages[iPrev].iNext = pPage->iNext;
1358 break;
1359 }
1360 if (i == NIL_PGMPOOL_IDX)
1361 {
1362 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1363 break;
1364 }
1365 iPrev = i;
1366 }
1367 }
1368 pPage->iNext = NIL_PGMPOOL_IDX;
1369}
1370
1371
1372/**
1373 * Frees up one cache page.
1374 *
1375 * @returns VBox status code.
1376 * @retval VINF_SUCCESS on success.
1377 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1378 * @param pPool The pool.
1379 * @param iUser The user index.
1380 */
1381static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1382{
1383#ifndef IN_RC
1384 const PVM pVM = pPool->CTX_SUFF(pVM);
1385#endif
1386 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1387 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1388
1389 /*
1390 * Select one page from the tail of the age list.
1391 */
1392 uint16_t iToFree = pPool->iAgeTail;
1393 if (iToFree == iUser)
1394 iToFree = pPool->aPages[iToFree].iAgePrev;
1395/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1396 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1397 {
1398 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1399 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1400 {
1401 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1402 continue;
1403 iToFree = i;
1404 break;
1405 }
1406 }
1407*/
1408
1409 Assert(iToFree != iUser);
1410 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1411
1412 PPGMPOOLPAGE pPage = &pPool->aPages[iToFree];
1413
1414 /*
1415 * Reject any attempts at flushing the currently active shadow CR3 mapping
1416 */
1417#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1418 if (pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1419#else
1420 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
1421#endif
1422 {
1423 /* Refresh the cr3 mapping by putting it at the head of the age list. */
1424 LogFlow(("pgmPoolCacheFreeOne refuse CR3 mapping\n"));
1425 pgmPoolCacheUsed(pPool, pPage);
1426 return pgmPoolCacheFreeOne(pPool, iUser);
1427 }
1428
1429 int rc = pgmPoolFlushPage(pPool, pPage);
1430 if (rc == VINF_SUCCESS)
1431 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1432 return rc;
1433}
1434
1435
1436/**
1437 * Checks if a kind mismatch is really a page being reused
1438 * or if it's just normal remappings.
1439 *
1440 * @returns true if reused and the cached page (enmKind1) should be flushed
1441 * @returns false if not reused.
1442 * @param enmKind1 The kind of the cached page.
1443 * @param enmKind2 The kind of the requested page.
1444 */
1445static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1446{
1447 switch (enmKind1)
1448 {
1449 /*
1450 * Never reuse them. There is no remapping in non-paging mode.
1451 */
1452 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1453 case PGMPOOLKIND_32BIT_PD_PHYS:
1454 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1455 case PGMPOOLKIND_PAE_PD_PHYS:
1456 case PGMPOOLKIND_PAE_PDPT_PHYS:
1457 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1458 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1459 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1460 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1461 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1462#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1463 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1464 return false;
1465#else
1466 return true;
1467#endif
1468
1469 /*
1470 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1471 */
1472 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1473 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1474 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1475 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1476 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1477 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1478 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1479 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1480 case PGMPOOLKIND_32BIT_PD:
1481 switch (enmKind2)
1482 {
1483 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1484 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1485 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1486 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1487 case PGMPOOLKIND_64BIT_PML4:
1488 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1489 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1490 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1491 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1492 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1493 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1494 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1495 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1496 return true;
1497 default:
1498 return false;
1499 }
1500
1501 /*
1502 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1503 */
1504 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1505 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1506 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1507 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1508 case PGMPOOLKIND_64BIT_PML4:
1509 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1510 switch (enmKind2)
1511 {
1512 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1513 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1514 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1515 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1516 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1517 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1518 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1519 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1520 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1521 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1522 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1523 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1524 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1525 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1526 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1527 return true;
1528 default:
1529 return false;
1530 }
1531
1532 /*
1533 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1534 */
1535#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1536 case PGMPOOLKIND_ROOT_32BIT_PD:
1537 case PGMPOOLKIND_ROOT_PAE_PD:
1538 case PGMPOOLKIND_ROOT_PDPT:
1539#endif
1540 case PGMPOOLKIND_ROOT_NESTED:
1541 return false;
1542
1543 default:
1544 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1545 }
1546}
1547
1548
1549/**
1550 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1551 *
1552 * @returns VBox status code.
1553 * @retval VINF_PGM_CACHED_PAGE on success.
1554 * @retval VERR_FILE_NOT_FOUND if not found.
1555 * @param pPool The pool.
1556 * @param GCPhys The GC physical address of the page we're gonna shadow.
1557 * @param enmKind The kind of mapping.
1558 * @param iUser The shadow page pool index of the user table.
1559 * @param iUserTable The index into the user table (shadowed).
1560 * @param ppPage Where to store the pointer to the page.
1561 */
1562static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1563{
1564#ifndef IN_RC
1565 const PVM pVM = pPool->CTX_SUFF(pVM);
1566#endif
1567 /*
1568 * Look up the GCPhys in the hash.
1569 */
1570 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1571 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
1572 if (i != NIL_PGMPOOL_IDX)
1573 {
1574 do
1575 {
1576 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1577 Log3(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1578 if (pPage->GCPhys == GCPhys)
1579 {
1580 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1581 {
1582 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
1583 * doesn't flush it in case there are no more free use records.
1584 */
1585 pgmPoolCacheUsed(pPool, pPage);
1586
1587 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1588 if (RT_SUCCESS(rc))
1589 {
1590 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
1591 *ppPage = pPage;
1592 STAM_COUNTER_INC(&pPool->StatCacheHits);
1593 return VINF_PGM_CACHED_PAGE;
1594 }
1595 return rc;
1596 }
1597
1598 /*
1599 * The kind is different. In some cases we should now flush the page
1600 * as it has been reused, but in most cases this is normal remapping
1601 * of PDs as PT or big pages using the GCPhys field in a slightly
1602 * different way than the other kinds.
1603 */
1604 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1605 {
1606 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1607 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1608 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1609 break;
1610 }
1611 }
1612
1613 /* next */
1614 i = pPage->iNext;
1615 } while (i != NIL_PGMPOOL_IDX);
1616 }
1617
1618 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
1619 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1620 return VERR_FILE_NOT_FOUND;
1621}
1622
1623
1624/**
1625 * Inserts a page into the cache.
1626 *
1627 * @param pPool The pool.
1628 * @param pPage The cached page.
1629 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1630 */
1631static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1632{
1633 /*
1634 * Insert into the GCPhys hash if the page is fit for that.
1635 */
1636 Assert(!pPage->fCached);
1637 if (fCanBeCached)
1638 {
1639 pPage->fCached = true;
1640 pgmPoolHashInsert(pPool, pPage);
1641 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1642 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1643 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1644 }
1645 else
1646 {
1647 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1648 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1649 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1650 }
1651
1652 /*
1653 * Insert at the head of the age list.
1654 */
1655 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1656 pPage->iAgeNext = pPool->iAgeHead;
1657 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1658 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1659 else
1660 pPool->iAgeTail = pPage->idx;
1661 pPool->iAgeHead = pPage->idx;
1662}
1663
1664
1665/**
1666 * Flushes a cached page.
1667 *
1668 * @param pPool The pool.
1669 * @param pPage The cached page.
1670 */
1671static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1672{
1673 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1674
1675 /*
1676 * Remove the page from the hash.
1677 */
1678 if (pPage->fCached)
1679 {
1680 pPage->fCached = false;
1681 pgmPoolHashRemove(pPool, pPage);
1682 }
1683 else
1684 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1685
1686 /*
1687 * Remove it from the age list.
1688 */
1689 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1690 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1691 else
1692 pPool->iAgeTail = pPage->iAgePrev;
1693 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1694 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1695 else
1696 pPool->iAgeHead = pPage->iAgeNext;
1697 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1698 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1699}
1700
1701#endif /* PGMPOOL_WITH_CACHE */
1702#ifdef PGMPOOL_WITH_MONITORING
1703
1704/**
1705 * Looks for pages sharing the monitor.
1706 *
1707 * @returns Pointer to the head page.
1708 * @returns NULL if not found.
1709 * @param pPool The Pool
1710 * @param pNewPage The page which is going to be monitored.
1711 */
1712static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1713{
1714#ifdef PGMPOOL_WITH_CACHE
1715 /*
1716 * Look up the GCPhys in the hash.
1717 */
1718 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1719 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1720 if (i == NIL_PGMPOOL_IDX)
1721 return NULL;
1722 do
1723 {
1724 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1725 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1726 && pPage != pNewPage)
1727 {
1728 switch (pPage->enmKind)
1729 {
1730 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1731 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1732 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1733 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1734 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1735 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1736 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1737 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1738 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1739 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1740 case PGMPOOLKIND_64BIT_PML4:
1741#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1742 case PGMPOOLKIND_32BIT_PD:
1743 case PGMPOOLKIND_PAE_PDPT:
1744#else
1745 case PGMPOOLKIND_ROOT_32BIT_PD:
1746 case PGMPOOLKIND_ROOT_PAE_PD:
1747 case PGMPOOLKIND_ROOT_PDPT:
1748#endif
1749 {
1750 /* find the head */
1751 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1752 {
1753 Assert(pPage->iMonitoredPrev != pPage->idx);
1754 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1755 }
1756 return pPage;
1757 }
1758
1759 /* ignore, no monitoring. */
1760 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1761 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1762 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1763 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1764 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1765 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1766 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1767 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1768 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1769 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1770 case PGMPOOLKIND_ROOT_NESTED:
1771 case PGMPOOLKIND_PAE_PD_PHYS:
1772 case PGMPOOLKIND_PAE_PDPT_PHYS:
1773 case PGMPOOLKIND_32BIT_PD_PHYS:
1774#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1775 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1776#endif
1777 break;
1778 default:
1779 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1780 }
1781 }
1782
1783 /* next */
1784 i = pPage->iNext;
1785 } while (i != NIL_PGMPOOL_IDX);
1786#endif
1787 return NULL;
1788}
1789
1790
1791/**
1792 * Enabled write monitoring of a guest page.
1793 *
1794 * @returns VBox status code.
1795 * @retval VINF_SUCCESS on success.
1796 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1797 * @param pPool The pool.
1798 * @param pPage The cached page.
1799 */
1800static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1801{
1802 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1803
1804 /*
1805 * Filter out the relevant kinds.
1806 */
1807 switch (pPage->enmKind)
1808 {
1809 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1810 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1811 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1812 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1813 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1814 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1815 case PGMPOOLKIND_64BIT_PML4:
1816#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1817 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1818 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1819 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1820 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1821 case PGMPOOLKIND_32BIT_PD:
1822 case PGMPOOLKIND_PAE_PDPT:
1823#else
1824 case PGMPOOLKIND_ROOT_PDPT:
1825#endif
1826 break;
1827
1828 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1829 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1830 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1831 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1832 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1833 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1834 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1835 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1836 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1837 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1838 case PGMPOOLKIND_ROOT_NESTED:
1839 /* Nothing to monitor here. */
1840 return VINF_SUCCESS;
1841
1842#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1843 case PGMPOOLKIND_32BIT_PD_PHYS:
1844 case PGMPOOLKIND_PAE_PDPT_PHYS:
1845 case PGMPOOLKIND_PAE_PD_PHYS:
1846 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1847 /* Nothing to monitor here. */
1848 return VINF_SUCCESS;
1849#else
1850 case PGMPOOLKIND_ROOT_32BIT_PD:
1851 case PGMPOOLKIND_ROOT_PAE_PD:
1852#endif
1853#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1854 break;
1855#else
1856 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1857#endif
1858 default:
1859 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1860 }
1861
1862 /*
1863 * Install handler.
1864 */
1865 int rc;
1866 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1867 if (pPageHead)
1868 {
1869 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1870 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1871 pPage->iMonitoredPrev = pPageHead->idx;
1872 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1873 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1874 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1875 pPageHead->iMonitoredNext = pPage->idx;
1876 rc = VINF_SUCCESS;
1877 }
1878 else
1879 {
1880 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1881 PVM pVM = pPool->CTX_SUFF(pVM);
1882 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1883 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1884 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1885 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1886 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1887 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1888 pPool->pszAccessHandler);
1889 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1890 * the heap size should suffice. */
1891 AssertFatalRC(rc);
1892 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1893 rc = VERR_PGM_POOL_CLEARED;
1894 }
1895 pPage->fMonitored = true;
1896 return rc;
1897}
1898
1899
1900/**
1901 * Disables write monitoring of a guest page.
1902 *
1903 * @returns VBox status code.
1904 * @retval VINF_SUCCESS on success.
1905 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1906 * @param pPool The pool.
1907 * @param pPage The cached page.
1908 */
1909static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1910{
1911 /*
1912 * Filter out the relevant kinds.
1913 */
1914 switch (pPage->enmKind)
1915 {
1916 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1917 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1918 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1919 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1920 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1921 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1922 case PGMPOOLKIND_64BIT_PML4:
1923#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1924 case PGMPOOLKIND_32BIT_PD:
1925 case PGMPOOLKIND_PAE_PDPT:
1926 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1927 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1928 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1929 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1930#else
1931 case PGMPOOLKIND_ROOT_PDPT:
1932#endif
1933 break;
1934
1935 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1936 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1937 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1938 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1939 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1940 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1941 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1942 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1943 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1944 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1945 case PGMPOOLKIND_ROOT_NESTED:
1946 case PGMPOOLKIND_PAE_PD_PHYS:
1947 case PGMPOOLKIND_PAE_PDPT_PHYS:
1948 case PGMPOOLKIND_32BIT_PD_PHYS:
1949 /* Nothing to monitor here. */
1950 return VINF_SUCCESS;
1951
1952#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1953 case PGMPOOLKIND_ROOT_32BIT_PD:
1954 case PGMPOOLKIND_ROOT_PAE_PD:
1955#endif
1956#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1957 break;
1958#endif
1959#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1960 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1961#endif
1962 default:
1963 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1964 }
1965
1966 /*
1967 * Remove the page from the monitored list or uninstall it if last.
1968 */
1969 const PVM pVM = pPool->CTX_SUFF(pVM);
1970 int rc;
1971 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1972 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1973 {
1974 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1975 {
1976 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1977 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1978#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1979 pNewHead->fCR3Mix = pPage->fCR3Mix;
1980#endif
1981 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1982 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1983 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1984 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
1985 pPool->pszAccessHandler);
1986 AssertFatalRCSuccess(rc);
1987 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1988 }
1989 else
1990 {
1991 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1992 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1993 {
1994 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1995 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1996 }
1997 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1998 rc = VINF_SUCCESS;
1999 }
2000 }
2001 else
2002 {
2003 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2004 AssertFatalRC(rc);
2005 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2006 rc = VERR_PGM_POOL_CLEARED;
2007 }
2008 pPage->fMonitored = false;
2009
2010 /*
2011 * Remove it from the list of modified pages (if in it).
2012 */
2013 pgmPoolMonitorModifiedRemove(pPool, pPage);
2014
2015 return rc;
2016}
2017
2018# if defined(PGMPOOL_WITH_MIXED_PT_CR3) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2019
2020/**
2021 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
2022 *
2023 * @param pPool The Pool.
2024 * @param pPage A page in the chain.
2025 * @param fCR3Mix The new fCR3Mix value.
2026 */
2027static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
2028{
2029 /* current */
2030 pPage->fCR3Mix = fCR3Mix;
2031
2032 /* before */
2033 int16_t idx = pPage->iMonitoredPrev;
2034 while (idx != NIL_PGMPOOL_IDX)
2035 {
2036 pPool->aPages[idx].fCR3Mix = fCR3Mix;
2037 idx = pPool->aPages[idx].iMonitoredPrev;
2038 }
2039
2040 /* after */
2041 idx = pPage->iMonitoredNext;
2042 while (idx != NIL_PGMPOOL_IDX)
2043 {
2044 pPool->aPages[idx].fCR3Mix = fCR3Mix;
2045 idx = pPool->aPages[idx].iMonitoredNext;
2046 }
2047}
2048
2049
2050/**
2051 * Installs or modifies monitoring of a CR3 page (special).
2052 *
2053 * We're pretending the CR3 page is shadowed by the pool so we can use the
2054 * generic mechanisms in detecting chained monitoring. (This also gives us a
2055 * tast of what code changes are required to really pool CR3 shadow pages.)
2056 *
2057 * @returns VBox status code.
2058 * @param pPool The pool.
2059 * @param idxRoot The CR3 (root) page index.
2060 * @param GCPhysCR3 The (new) CR3 value.
2061 */
2062int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
2063{
2064 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
2065 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
2066 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d} GCPhysCR3=%RGp\n",
2067 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
2068
2069 /*
2070 * The unlikely case where it already matches.
2071 */
2072 if (pPage->GCPhys == GCPhysCR3)
2073 {
2074 Assert(pPage->fMonitored);
2075 return VINF_SUCCESS;
2076 }
2077
2078 /*
2079 * Flush the current monitoring and remove it from the hash.
2080 */
2081 int rc = VINF_SUCCESS;
2082 if (pPage->fMonitored)
2083 {
2084 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
2085 rc = pgmPoolMonitorFlush(pPool, pPage);
2086 if (rc == VERR_PGM_POOL_CLEARED)
2087 rc = VINF_SUCCESS;
2088 else
2089 AssertFatalRC(rc);
2090 pgmPoolHashRemove(pPool, pPage);
2091 }
2092
2093 /*
2094 * Monitor the page at the new location and insert it into the hash.
2095 */
2096 pPage->GCPhys = GCPhysCR3;
2097 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
2098 if (rc2 != VERR_PGM_POOL_CLEARED)
2099 {
2100 AssertFatalRC(rc2);
2101 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
2102 rc = rc2;
2103 }
2104 pgmPoolHashInsert(pPool, pPage);
2105 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
2106 return rc;
2107}
2108
2109
2110/**
2111 * Removes the monitoring of a CR3 page (special).
2112 *
2113 * @returns VBox status code.
2114 * @param pPool The pool.
2115 * @param idxRoot The CR3 (root) page index.
2116 */
2117int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
2118{
2119 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
2120 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
2121 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d}\n",
2122 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
2123
2124 if (!pPage->fMonitored)
2125 return VINF_SUCCESS;
2126
2127 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
2128 int rc = pgmPoolMonitorFlush(pPool, pPage);
2129 if (rc != VERR_PGM_POOL_CLEARED)
2130 AssertFatalRC(rc);
2131 else
2132 rc = VINF_SUCCESS;
2133 pgmPoolHashRemove(pPool, pPage);
2134 Assert(!pPage->fMonitored);
2135 pPage->GCPhys = NIL_RTGCPHYS;
2136 return rc;
2137}
2138
2139# endif /* PGMPOOL_WITH_MIXED_PT_CR3 && !VBOX_WITH_PGMPOOL_PAGING_ONLY*/
2140
2141/**
2142 * Inserts the page into the list of modified pages.
2143 *
2144 * @param pPool The pool.
2145 * @param pPage The page.
2146 */
2147void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2148{
2149 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2150 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2151 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2152 && pPool->iModifiedHead != pPage->idx,
2153 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2154 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2155 pPool->iModifiedHead, pPool->cModifiedPages));
2156
2157 pPage->iModifiedNext = pPool->iModifiedHead;
2158 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2159 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2160 pPool->iModifiedHead = pPage->idx;
2161 pPool->cModifiedPages++;
2162#ifdef VBOX_WITH_STATISTICS
2163 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2164 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2165#endif
2166}
2167
2168
2169/**
2170 * Removes the page from the list of modified pages and resets the
2171 * moficiation counter.
2172 *
2173 * @param pPool The pool.
2174 * @param pPage The page which is believed to be in the list of modified pages.
2175 */
2176static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2177{
2178 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2179 if (pPool->iModifiedHead == pPage->idx)
2180 {
2181 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2182 pPool->iModifiedHead = pPage->iModifiedNext;
2183 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2184 {
2185 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2186 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2187 }
2188 pPool->cModifiedPages--;
2189 }
2190 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2191 {
2192 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2193 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2194 {
2195 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2196 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2197 }
2198 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2199 pPool->cModifiedPages--;
2200 }
2201 else
2202 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2203 pPage->cModifications = 0;
2204}
2205
2206
2207/**
2208 * Zaps the list of modified pages, resetting their modification counters in the process.
2209 *
2210 * @param pVM The VM handle.
2211 */
2212void pgmPoolMonitorModifiedClearAll(PVM pVM)
2213{
2214 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2215 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2216
2217 unsigned cPages = 0; NOREF(cPages);
2218 uint16_t idx = pPool->iModifiedHead;
2219 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2220 while (idx != NIL_PGMPOOL_IDX)
2221 {
2222 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2223 idx = pPage->iModifiedNext;
2224 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2225 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2226 pPage->cModifications = 0;
2227 Assert(++cPages);
2228 }
2229 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2230 pPool->cModifiedPages = 0;
2231}
2232
2233
2234#ifdef IN_RING3
2235/**
2236 * Clear all shadow pages and clear all modification counters.
2237 *
2238 * @param pVM The VM handle.
2239 * @remark Should only be used when monitoring is available, thus placed in
2240 * the PGMPOOL_WITH_MONITORING #ifdef.
2241 */
2242void pgmPoolClearAll(PVM pVM)
2243{
2244 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2245 STAM_PROFILE_START(&pPool->StatClearAll, c);
2246 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2247
2248 /*
2249 * Iterate all the pages until we've encountered all that in use.
2250 * This is simple but not quite optimal solution.
2251 */
2252 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2253 unsigned cLeft = pPool->cUsedPages;
2254 unsigned iPage = pPool->cCurPages;
2255 while (--iPage >= PGMPOOL_IDX_FIRST)
2256 {
2257 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2258 if (pPage->GCPhys != NIL_RTGCPHYS)
2259 {
2260 switch (pPage->enmKind)
2261 {
2262 /*
2263 * We only care about shadow page tables.
2264 */
2265 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2266 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2267 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2268 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2269 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2270 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2271 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2272 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2273 {
2274#ifdef PGMPOOL_WITH_USER_TRACKING
2275 if (pPage->cPresent)
2276#endif
2277 {
2278 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2279 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2280 ASMMemZeroPage(pvShw);
2281 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2282#ifdef PGMPOOL_WITH_USER_TRACKING
2283 pPage->cPresent = 0;
2284 pPage->iFirstPresent = ~0;
2285#endif
2286 }
2287 }
2288 /* fall thru */
2289
2290 default:
2291 Assert(!pPage->cModifications || ++cModifiedPages);
2292 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2293 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2294 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2295 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2296 pPage->cModifications = 0;
2297 break;
2298
2299 }
2300 if (!--cLeft)
2301 break;
2302 }
2303 }
2304
2305 /* swipe the special pages too. */
2306 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2307 {
2308 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2309 if (pPage->GCPhys != NIL_RTGCPHYS)
2310 {
2311 Assert(!pPage->cModifications || ++cModifiedPages);
2312 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2313 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2314 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2315 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2316 pPage->cModifications = 0;
2317 }
2318 }
2319
2320#ifndef DEBUG_michael
2321 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2322#endif
2323 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2324 pPool->cModifiedPages = 0;
2325
2326#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2327 /*
2328 * Clear all the GCPhys links and rebuild the phys ext free list.
2329 */
2330 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2331 pRam;
2332 pRam = pRam->CTX_SUFF(pNext))
2333 {
2334 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2335 while (iPage-- > 0)
2336 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2337 }
2338
2339 pPool->iPhysExtFreeHead = 0;
2340 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2341 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2342 for (unsigned i = 0; i < cMaxPhysExts; i++)
2343 {
2344 paPhysExts[i].iNext = i + 1;
2345 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2346 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2347 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2348 }
2349 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2350#endif
2351
2352
2353 pPool->cPresent = 0;
2354 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2355}
2356#endif /* IN_RING3 */
2357
2358
2359/**
2360 * Handle SyncCR3 pool tasks
2361 *
2362 * @returns VBox status code.
2363 * @retval VINF_SUCCESS if successfully added.
2364 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2365 * @param pVM The VM handle.
2366 * @remark Should only be used when monitoring is available, thus placed in
2367 * the PGMPOOL_WITH_MONITORING #ifdef.
2368 */
2369int pgmPoolSyncCR3(PVM pVM)
2370{
2371 LogFlow(("pgmPoolSyncCR3\n"));
2372 /*
2373 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2374 * Occasionally we will have to clear all the shadow page tables because we wanted
2375 * to monitor a page which was mapped by too many shadowed page tables. This operation
2376 * sometimes refered to as a 'lightweight flush'.
2377 */
2378 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2379 pgmPoolMonitorModifiedClearAll(pVM);
2380 else
2381 {
2382# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2383 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2384 pgmPoolClearAll(pVM);
2385# else /* !IN_RING3 */
2386 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2387 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2388 return VINF_PGM_SYNC_CR3;
2389# endif /* !IN_RING3 */
2390 }
2391 return VINF_SUCCESS;
2392}
2393
2394#endif /* PGMPOOL_WITH_MONITORING */
2395#ifdef PGMPOOL_WITH_USER_TRACKING
2396
2397/**
2398 * Frees up at least one user entry.
2399 *
2400 * @returns VBox status code.
2401 * @retval VINF_SUCCESS if successfully added.
2402 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2403 * @param pPool The pool.
2404 * @param iUser The user index.
2405 */
2406static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2407{
2408 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2409#ifdef PGMPOOL_WITH_CACHE
2410 /*
2411 * Just free cached pages in a braindead fashion.
2412 */
2413 /** @todo walk the age list backwards and free the first with usage. */
2414 int rc = VINF_SUCCESS;
2415 do
2416 {
2417 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2418 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2419 rc = rc2;
2420 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2421 return rc;
2422#else
2423 /*
2424 * Lazy approach.
2425 */
2426 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2427 Assert(!CPUMIsGuestInLongMode(pVM));
2428 pgmPoolFlushAllInt(pPool);
2429 return VERR_PGM_POOL_FLUSHED;
2430#endif
2431}
2432
2433
2434/**
2435 * Inserts a page into the cache.
2436 *
2437 * This will create user node for the page, insert it into the GCPhys
2438 * hash, and insert it into the age list.
2439 *
2440 * @returns VBox status code.
2441 * @retval VINF_SUCCESS if successfully added.
2442 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2443 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2444 * @param pPool The pool.
2445 * @param pPage The cached page.
2446 * @param GCPhys The GC physical address of the page we're gonna shadow.
2447 * @param iUser The user index.
2448 * @param iUserTable The user table index.
2449 */
2450DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2451{
2452 int rc = VINF_SUCCESS;
2453 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2454
2455 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2456
2457#ifdef VBOX_STRICT
2458 /*
2459 * Check that the entry doesn't already exists.
2460 */
2461 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2462 {
2463 uint16_t i = pPage->iUserHead;
2464 do
2465 {
2466 Assert(i < pPool->cMaxUsers);
2467 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2468 i = paUsers[i].iNext;
2469 } while (i != NIL_PGMPOOL_USER_INDEX);
2470 }
2471#endif
2472
2473 /*
2474 * Find free a user node.
2475 */
2476 uint16_t i = pPool->iUserFreeHead;
2477 if (i == NIL_PGMPOOL_USER_INDEX)
2478 {
2479 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2480 if (RT_FAILURE(rc))
2481 return rc;
2482 i = pPool->iUserFreeHead;
2483 }
2484
2485 /*
2486 * Unlink the user node from the free list,
2487 * initialize and insert it into the user list.
2488 */
2489 pPool->iUserFreeHead = paUsers[i].iNext;
2490 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2491 paUsers[i].iUser = iUser;
2492 paUsers[i].iUserTable = iUserTable;
2493 pPage->iUserHead = i;
2494
2495 /*
2496 * Insert into cache and enable monitoring of the guest page if enabled.
2497 *
2498 * Until we implement caching of all levels, including the CR3 one, we'll
2499 * have to make sure we don't try monitor & cache any recursive reuse of
2500 * a monitored CR3 page. Because all windows versions are doing this we'll
2501 * have to be able to do combined access monitoring, CR3 + PT and
2502 * PD + PT (guest PAE).
2503 *
2504 * Update:
2505 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2506 */
2507#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2508# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2509 const bool fCanBeMonitored = true;
2510# else
2511 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2512 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2513 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2514# endif
2515# ifdef PGMPOOL_WITH_CACHE
2516 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2517# endif
2518 if (fCanBeMonitored)
2519 {
2520# ifdef PGMPOOL_WITH_MONITORING
2521 rc = pgmPoolMonitorInsert(pPool, pPage);
2522 if (rc == VERR_PGM_POOL_CLEARED)
2523 {
2524 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2525# ifndef PGMPOOL_WITH_CACHE
2526 pgmPoolMonitorFlush(pPool, pPage);
2527 rc = VERR_PGM_POOL_FLUSHED;
2528# endif
2529 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2530 paUsers[i].iNext = pPool->iUserFreeHead;
2531 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2532 pPool->iUserFreeHead = i;
2533 }
2534 }
2535# endif
2536#endif /* PGMPOOL_WITH_MONITORING */
2537 return rc;
2538}
2539
2540
2541# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2542/**
2543 * Adds a user reference to a page.
2544 *
2545 * This will move the page to the head of the
2546 *
2547 * @returns VBox status code.
2548 * @retval VINF_SUCCESS if successfully added.
2549 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2550 * @param pPool The pool.
2551 * @param pPage The cached page.
2552 * @param iUser The user index.
2553 * @param iUserTable The user table.
2554 */
2555static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2556{
2557 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2558
2559 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2560# ifdef VBOX_STRICT
2561 /*
2562 * Check that the entry doesn't already exists.
2563 */
2564 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2565 {
2566 uint16_t i = pPage->iUserHead;
2567 do
2568 {
2569 Assert(i < pPool->cMaxUsers);
2570 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2571 i = paUsers[i].iNext;
2572 } while (i != NIL_PGMPOOL_USER_INDEX);
2573 }
2574# endif
2575
2576 /*
2577 * Allocate a user node.
2578 */
2579 uint16_t i = pPool->iUserFreeHead;
2580 if (i == NIL_PGMPOOL_USER_INDEX)
2581 {
2582 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2583 if (RT_FAILURE(rc))
2584 return rc;
2585 i = pPool->iUserFreeHead;
2586 }
2587 pPool->iUserFreeHead = paUsers[i].iNext;
2588
2589 /*
2590 * Initialize the user node and insert it.
2591 */
2592 paUsers[i].iNext = pPage->iUserHead;
2593 paUsers[i].iUser = iUser;
2594 paUsers[i].iUserTable = iUserTable;
2595 pPage->iUserHead = i;
2596
2597# ifdef PGMPOOL_WITH_CACHE
2598 /*
2599 * Tell the cache to update its replacement stats for this page.
2600 */
2601 pgmPoolCacheUsed(pPool, pPage);
2602# endif
2603 return VINF_SUCCESS;
2604}
2605# endif /* PGMPOOL_WITH_CACHE */
2606
2607
2608/**
2609 * Frees a user record associated with a page.
2610 *
2611 * This does not clear the entry in the user table, it simply replaces the
2612 * user record to the chain of free records.
2613 *
2614 * @param pPool The pool.
2615 * @param HCPhys The HC physical address of the shadow page.
2616 * @param iUser The shadow page pool index of the user table.
2617 * @param iUserTable The index into the user table (shadowed).
2618 */
2619static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2620{
2621 /*
2622 * Unlink and free the specified user entry.
2623 */
2624 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2625
2626 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2627 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2628 uint16_t i = pPage->iUserHead;
2629 if ( i != NIL_PGMPOOL_USER_INDEX
2630 && paUsers[i].iUser == iUser
2631 && paUsers[i].iUserTable == iUserTable)
2632 {
2633 pPage->iUserHead = paUsers[i].iNext;
2634
2635 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2636 paUsers[i].iNext = pPool->iUserFreeHead;
2637 pPool->iUserFreeHead = i;
2638 return;
2639 }
2640
2641 /* General: Linear search. */
2642 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2643 while (i != NIL_PGMPOOL_USER_INDEX)
2644 {
2645 if ( paUsers[i].iUser == iUser
2646 && paUsers[i].iUserTable == iUserTable)
2647 {
2648 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2649 paUsers[iPrev].iNext = paUsers[i].iNext;
2650 else
2651 pPage->iUserHead = paUsers[i].iNext;
2652
2653 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2654 paUsers[i].iNext = pPool->iUserFreeHead;
2655 pPool->iUserFreeHead = i;
2656 return;
2657 }
2658 iPrev = i;
2659 i = paUsers[i].iNext;
2660 }
2661
2662 /* Fatal: didn't find it */
2663 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2664 iUser, iUserTable, pPage->GCPhys));
2665}
2666
2667
2668/**
2669 * Gets the entry size of a shadow table.
2670 *
2671 * @param enmKind The kind of page.
2672 *
2673 * @returns The size of the entry in bytes. That is, 4 or 8.
2674 * @returns If the kind is not for a table, an assertion is raised and 0 is
2675 * returned.
2676 */
2677DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2678{
2679 switch (enmKind)
2680 {
2681 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2682 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2683 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2684#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2685 case PGMPOOLKIND_32BIT_PD:
2686 case PGMPOOLKIND_32BIT_PD_PHYS:
2687#else
2688 case PGMPOOLKIND_ROOT_32BIT_PD:
2689#endif
2690 return 4;
2691
2692 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2693 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2694 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2695 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2696 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2697 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2698 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2699 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2700 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2701 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2702 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2703 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2704 case PGMPOOLKIND_64BIT_PML4:
2705#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2706 case PGMPOOLKIND_ROOT_PAE_PD:
2707 case PGMPOOLKIND_ROOT_PDPT:
2708#endif
2709 case PGMPOOLKIND_PAE_PDPT:
2710 case PGMPOOLKIND_ROOT_NESTED:
2711 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2712 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2713 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2714 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2715 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2716 case PGMPOOLKIND_PAE_PD_PHYS:
2717 case PGMPOOLKIND_PAE_PDPT_PHYS:
2718 return 8;
2719
2720 default:
2721 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2722 }
2723}
2724
2725
2726/**
2727 * Gets the entry size of a guest table.
2728 *
2729 * @param enmKind The kind of page.
2730 *
2731 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2732 * @returns If the kind is not for a table, an assertion is raised and 0 is
2733 * returned.
2734 */
2735DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2736{
2737 switch (enmKind)
2738 {
2739 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2740 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2741#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2742 case PGMPOOLKIND_32BIT_PD:
2743#else
2744 case PGMPOOLKIND_ROOT_32BIT_PD:
2745#endif
2746 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2747 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2748 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2749 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2750 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2751 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2752 return 4;
2753
2754 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2755 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2756 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2757 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2758 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2759 case PGMPOOLKIND_64BIT_PML4:
2760#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2761 case PGMPOOLKIND_PAE_PDPT:
2762#else
2763 case PGMPOOLKIND_ROOT_PAE_PD:
2764 case PGMPOOLKIND_ROOT_PDPT:
2765#endif
2766 return 8;
2767
2768 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2769 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2770 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2771 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2772 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2773 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2774 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2775 case PGMPOOLKIND_ROOT_NESTED:
2776 case PGMPOOLKIND_PAE_PD_PHYS:
2777 case PGMPOOLKIND_PAE_PDPT_PHYS:
2778 case PGMPOOLKIND_32BIT_PD_PHYS:
2779 /** @todo can we return 0? (nobody is calling this...) */
2780 AssertFailed();
2781 return 0;
2782
2783 default:
2784 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2785 }
2786}
2787
2788#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2789
2790/**
2791 * Scans one shadow page table for mappings of a physical page.
2792 *
2793 * @param pVM The VM handle.
2794 * @param pPhysPage The guest page in question.
2795 * @param iShw The shadow page table.
2796 * @param cRefs The number of references made in that PT.
2797 */
2798static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2799{
2800 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2801 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2802
2803 /*
2804 * Assert sanity.
2805 */
2806 Assert(cRefs == 1);
2807 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2808 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2809
2810 /*
2811 * Then, clear the actual mappings to the page in the shadow PT.
2812 */
2813 switch (pPage->enmKind)
2814 {
2815 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2816 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2817 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2818 {
2819 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2820 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2821 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2822 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2823 {
2824 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2825 pPT->a[i].u = 0;
2826 cRefs--;
2827 if (!cRefs)
2828 return;
2829 }
2830#ifdef LOG_ENABLED
2831 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2832 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2833 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2834 {
2835 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2836 pPT->a[i].u = 0;
2837 }
2838#endif
2839 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2840 break;
2841 }
2842
2843 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2844 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2845 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2846 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2847 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2848 {
2849 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2850 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2851 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2852 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2853 {
2854 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2855 pPT->a[i].u = 0;
2856 cRefs--;
2857 if (!cRefs)
2858 return;
2859 }
2860#ifdef LOG_ENABLED
2861 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2862 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2863 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2864 {
2865 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2866 pPT->a[i].u = 0;
2867 }
2868#endif
2869 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
2870 break;
2871 }
2872
2873 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2874 {
2875 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2876 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2877 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2878 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2879 {
2880 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2881 pPT->a[i].u = 0;
2882 cRefs--;
2883 if (!cRefs)
2884 return;
2885 }
2886#ifdef LOG_ENABLED
2887 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2888 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2889 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2890 {
2891 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2892 pPT->a[i].u = 0;
2893 }
2894#endif
2895 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2896 break;
2897 }
2898
2899 default:
2900 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2901 }
2902}
2903
2904
2905/**
2906 * Scans one shadow page table for mappings of a physical page.
2907 *
2908 * @param pVM The VM handle.
2909 * @param pPhysPage The guest page in question.
2910 * @param iShw The shadow page table.
2911 * @param cRefs The number of references made in that PT.
2912 */
2913void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2914{
2915 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2916 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2917 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2918 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2919 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2920 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2921}
2922
2923
2924/**
2925 * Flushes a list of shadow page tables mapping the same physical page.
2926 *
2927 * @param pVM The VM handle.
2928 * @param pPhysPage The guest page in question.
2929 * @param iPhysExt The physical cross reference extent list to flush.
2930 */
2931void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2932{
2933 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2934 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2935 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2936
2937 const uint16_t iPhysExtStart = iPhysExt;
2938 PPGMPOOLPHYSEXT pPhysExt;
2939 do
2940 {
2941 Assert(iPhysExt < pPool->cMaxPhysExts);
2942 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2943 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2944 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2945 {
2946 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2947 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2948 }
2949
2950 /* next */
2951 iPhysExt = pPhysExt->iNext;
2952 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2953
2954 /* insert the list into the free list and clear the ram range entry. */
2955 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2956 pPool->iPhysExtFreeHead = iPhysExtStart;
2957 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2958
2959 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2960}
2961
2962#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2963
2964/**
2965 * Scans all shadow page tables for mappings of a physical page.
2966 *
2967 * This may be slow, but it's most likely more efficient than cleaning
2968 * out the entire page pool / cache.
2969 *
2970 * @returns VBox status code.
2971 * @retval VINF_SUCCESS if all references has been successfully cleared.
2972 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2973 * a page pool cleaning.
2974 *
2975 * @param pVM The VM handle.
2976 * @param pPhysPage The guest page in question.
2977 */
2978int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2979{
2980 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2981 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2982 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2983 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2984
2985#if 1
2986 /*
2987 * There is a limit to what makes sense.
2988 */
2989 if (pPool->cPresent > 1024)
2990 {
2991 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2992 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2993 return VINF_PGM_GCPHYS_ALIASED;
2994 }
2995#endif
2996
2997 /*
2998 * Iterate all the pages until we've encountered all that in use.
2999 * This is simple but not quite optimal solution.
3000 */
3001 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3002 const uint32_t u32 = u64;
3003 unsigned cLeft = pPool->cUsedPages;
3004 unsigned iPage = pPool->cCurPages;
3005 while (--iPage >= PGMPOOL_IDX_FIRST)
3006 {
3007 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3008 if (pPage->GCPhys != NIL_RTGCPHYS)
3009 {
3010 switch (pPage->enmKind)
3011 {
3012 /*
3013 * We only care about shadow page tables.
3014 */
3015 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3016 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3017 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3018 {
3019 unsigned cPresent = pPage->cPresent;
3020 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3021 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3022 if (pPT->a[i].n.u1Present)
3023 {
3024 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3025 {
3026 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3027 pPT->a[i].u = 0;
3028 }
3029 if (!--cPresent)
3030 break;
3031 }
3032 break;
3033 }
3034
3035 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3036 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3037 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3038 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3039 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3040 {
3041 unsigned cPresent = pPage->cPresent;
3042 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3043 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3044 if (pPT->a[i].n.u1Present)
3045 {
3046 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3047 {
3048 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3049 pPT->a[i].u = 0;
3050 }
3051 if (!--cPresent)
3052 break;
3053 }
3054 break;
3055 }
3056 }
3057 if (!--cLeft)
3058 break;
3059 }
3060 }
3061
3062 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3063 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3064 return VINF_SUCCESS;
3065}
3066
3067
3068/**
3069 * Clears the user entry in a user table.
3070 *
3071 * This is used to remove all references to a page when flushing it.
3072 */
3073static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3074{
3075 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3076 Assert(pUser->iUser < pPool->cCurPages);
3077 uint32_t iUserTable = pUser->iUserTable;
3078
3079 /*
3080 * Map the user page.
3081 */
3082 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3083#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3084 if (pUserPage->enmKind == PGMPOOLKIND_ROOT_PAE_PD)
3085 {
3086 /* Must translate the fake 2048 entry PD to a 512 PD one since the R0 mapping is not linear. */
3087 Assert(pUser->iUser == PGMPOOL_IDX_PAE_PD);
3088 uint32_t iPdpt = iUserTable / X86_PG_PAE_ENTRIES;
3089 iUserTable %= X86_PG_PAE_ENTRIES;
3090 pUserPage = &pPool->aPages[PGMPOOL_IDX_PAE_PD_0 + iPdpt];
3091 Assert(pUserPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD);
3092 }
3093#endif
3094 union
3095 {
3096 uint64_t *pau64;
3097 uint32_t *pau32;
3098 } u;
3099 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3100
3101 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3102
3103 /* Safety precaution in case we change the paging for other modes too in the future. */
3104#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3105 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3106#else
3107 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
3108#endif
3109
3110#ifdef VBOX_STRICT
3111 /*
3112 * Some sanity checks.
3113 */
3114 switch (pUserPage->enmKind)
3115 {
3116# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3117 case PGMPOOLKIND_32BIT_PD:
3118 case PGMPOOLKIND_32BIT_PD_PHYS:
3119 Assert(iUserTable < X86_PG_ENTRIES);
3120 break;
3121# else
3122 case PGMPOOLKIND_ROOT_32BIT_PD:
3123 Assert(iUserTable < X86_PG_ENTRIES);
3124 Assert(!(u.pau32[iUserTable] & PGM_PDFLAGS_MAPPING));
3125 break;
3126# endif
3127# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3128 case PGMPOOLKIND_ROOT_PAE_PD:
3129 Assert(iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
3130 AssertMsg(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING), ("%llx %d\n", u.pau64[iUserTable], iUserTable));
3131 break;
3132# endif
3133# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3134 case PGMPOOLKIND_PAE_PDPT:
3135 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3136 case PGMPOOLKIND_PAE_PDPT_PHYS:
3137# else
3138 case PGMPOOLKIND_ROOT_PDPT:
3139# endif
3140 Assert(iUserTable < 4);
3141 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3142 break;
3143 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3144 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3145 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3146 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3147 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3148 case PGMPOOLKIND_PAE_PD_PHYS:
3149 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3150 break;
3151 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3152 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3153 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3154 break;
3155 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3156 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3157 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3158 break;
3159 case PGMPOOLKIND_64BIT_PML4:
3160 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3161 /* GCPhys >> PAGE_SHIFT is the index here */
3162 break;
3163 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3164 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3165 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3166 break;
3167
3168 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3169 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3170 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3171 break;
3172
3173 case PGMPOOLKIND_ROOT_NESTED:
3174 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3175 break;
3176
3177 default:
3178 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3179 break;
3180 }
3181#endif /* VBOX_STRICT */
3182
3183 /*
3184 * Clear the entry in the user page.
3185 */
3186 switch (pUserPage->enmKind)
3187 {
3188 /* 32-bit entries */
3189#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3190 case PGMPOOLKIND_32BIT_PD:
3191 case PGMPOOLKIND_32BIT_PD_PHYS:
3192#else
3193 case PGMPOOLKIND_ROOT_32BIT_PD:
3194#endif
3195 u.pau32[iUserTable] = 0;
3196 break;
3197
3198 /* 64-bit entries */
3199 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3200 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3201 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3202 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3203 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3204#if defined(IN_RC) && defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3205 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3206 * non-present PDPT will continue to cause page faults.
3207 */
3208 ASMReloadCR3();
3209#endif
3210 /* no break */
3211 case PGMPOOLKIND_PAE_PD_PHYS:
3212 case PGMPOOLKIND_PAE_PDPT_PHYS:
3213 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3214 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3215 case PGMPOOLKIND_64BIT_PML4:
3216 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3217 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3218#if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3219 case PGMPOOLKIND_ROOT_PAE_PD:
3220#endif
3221#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3222 case PGMPOOLKIND_PAE_PDPT:
3223 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3224#else
3225 case PGMPOOLKIND_ROOT_PDPT:
3226#endif
3227 case PGMPOOLKIND_ROOT_NESTED:
3228 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3229 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3230 u.pau64[iUserTable] = 0;
3231 break;
3232
3233 default:
3234 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3235 }
3236}
3237
3238
3239/**
3240 * Clears all users of a page.
3241 */
3242static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3243{
3244 /*
3245 * Free all the user records.
3246 */
3247 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3248
3249 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3250 uint16_t i = pPage->iUserHead;
3251 while (i != NIL_PGMPOOL_USER_INDEX)
3252 {
3253 /* Clear enter in user table. */
3254 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3255
3256 /* Free it. */
3257 const uint16_t iNext = paUsers[i].iNext;
3258 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3259 paUsers[i].iNext = pPool->iUserFreeHead;
3260 pPool->iUserFreeHead = i;
3261
3262 /* Next. */
3263 i = iNext;
3264 }
3265 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3266}
3267
3268#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3269
3270/**
3271 * Allocates a new physical cross reference extent.
3272 *
3273 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3274 * @param pVM The VM handle.
3275 * @param piPhysExt Where to store the phys ext index.
3276 */
3277PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3278{
3279 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3280 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3281 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3282 {
3283 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3284 return NULL;
3285 }
3286 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3287 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3288 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3289 *piPhysExt = iPhysExt;
3290 return pPhysExt;
3291}
3292
3293
3294/**
3295 * Frees a physical cross reference extent.
3296 *
3297 * @param pVM The VM handle.
3298 * @param iPhysExt The extent to free.
3299 */
3300void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3301{
3302 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3303 Assert(iPhysExt < pPool->cMaxPhysExts);
3304 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3305 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3306 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3307 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3308 pPool->iPhysExtFreeHead = iPhysExt;
3309}
3310
3311
3312/**
3313 * Frees a physical cross reference extent.
3314 *
3315 * @param pVM The VM handle.
3316 * @param iPhysExt The extent to free.
3317 */
3318void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3319{
3320 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3321
3322 const uint16_t iPhysExtStart = iPhysExt;
3323 PPGMPOOLPHYSEXT pPhysExt;
3324 do
3325 {
3326 Assert(iPhysExt < pPool->cMaxPhysExts);
3327 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3328 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3329 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3330
3331 /* next */
3332 iPhysExt = pPhysExt->iNext;
3333 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3334
3335 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3336 pPool->iPhysExtFreeHead = iPhysExtStart;
3337}
3338
3339
3340/**
3341 * Insert a reference into a list of physical cross reference extents.
3342 *
3343 * @returns The new ram range flags (top 16-bits).
3344 *
3345 * @param pVM The VM handle.
3346 * @param iPhysExt The physical extent index of the list head.
3347 * @param iShwPT The shadow page table index.
3348 *
3349 */
3350static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3351{
3352 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3353 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3354
3355 /* special common case. */
3356 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3357 {
3358 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3359 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3360 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
3361 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3362 }
3363
3364 /* general treatment. */
3365 const uint16_t iPhysExtStart = iPhysExt;
3366 unsigned cMax = 15;
3367 for (;;)
3368 {
3369 Assert(iPhysExt < pPool->cMaxPhysExts);
3370 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3371 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3372 {
3373 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3374 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3375 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3376 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3377 }
3378 if (!--cMax)
3379 {
3380 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3381 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3382 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
3383 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3384 }
3385 }
3386
3387 /* add another extent to the list. */
3388 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3389 if (!pNew)
3390 {
3391 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3392 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3393 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3394 }
3395 pNew->iNext = iPhysExtStart;
3396 pNew->aidx[0] = iShwPT;
3397 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3398 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3399}
3400
3401
3402/**
3403 * Add a reference to guest physical page where extents are in use.
3404 *
3405 * @returns The new ram range flags (top 16-bits).
3406 *
3407 * @param pVM The VM handle.
3408 * @param u16 The ram range flags (top 16-bits).
3409 * @param iShwPT The shadow page table index.
3410 */
3411uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3412{
3413 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
3414 {
3415 /*
3416 * Convert to extent list.
3417 */
3418 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
3419 uint16_t iPhysExt;
3420 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3421 if (pPhysExt)
3422 {
3423 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
3424 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3425 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
3426 pPhysExt->aidx[1] = iShwPT;
3427 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3428 }
3429 else
3430 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3431 }
3432 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
3433 {
3434 /*
3435 * Insert into the extent list.
3436 */
3437 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
3438 }
3439 else
3440 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3441 return u16;
3442}
3443
3444
3445/**
3446 * Clear references to guest physical memory.
3447 *
3448 * @param pPool The pool.
3449 * @param pPage The page.
3450 * @param pPhysPage Pointer to the aPages entry in the ram range.
3451 */
3452void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3453{
3454 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
3455 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3456
3457 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
3458 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
3459 {
3460 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3461 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3462 do
3463 {
3464 Assert(iPhysExt < pPool->cMaxPhysExts);
3465
3466 /*
3467 * Look for the shadow page and check if it's all freed.
3468 */
3469 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3470 {
3471 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3472 {
3473 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3474
3475 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3476 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3477 {
3478 Log2(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3479 return;
3480 }
3481
3482 /* we can free the node. */
3483 PVM pVM = pPool->CTX_SUFF(pVM);
3484 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3485 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3486 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3487 {
3488 /* lonely node */
3489 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3490 Log2(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
3491 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3492 }
3493 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3494 {
3495 /* head */
3496 Log2(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
3497 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
3498 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
3499 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
3500 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3501 }
3502 else
3503 {
3504 /* in list */
3505 Log2(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3506 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3507 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3508 }
3509 iPhysExt = iPhysExtNext;
3510 return;
3511 }
3512 }
3513
3514 /* next */
3515 iPhysExtPrev = iPhysExt;
3516 iPhysExt = paPhysExts[iPhysExt].iNext;
3517 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3518
3519 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3520 }
3521 else /* nothing to do */
3522 Log2(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
3523}
3524
3525
3526/**
3527 * Clear references to guest physical memory.
3528 *
3529 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3530 * is assumed to be correct, so the linear search can be skipped and we can assert
3531 * at an earlier point.
3532 *
3533 * @param pPool The pool.
3534 * @param pPage The page.
3535 * @param HCPhys The host physical address corresponding to the guest page.
3536 * @param GCPhys The guest physical address corresponding to HCPhys.
3537 */
3538static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3539{
3540 /*
3541 * Walk range list.
3542 */
3543 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3544 while (pRam)
3545 {
3546 RTGCPHYS off = GCPhys - pRam->GCPhys;
3547 if (off < pRam->cb)
3548 {
3549 /* does it match? */
3550 const unsigned iPage = off >> PAGE_SHIFT;
3551 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3552#ifdef LOG_ENABLED
3553RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3554Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3555#endif
3556 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3557 {
3558 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3559 return;
3560 }
3561 break;
3562 }
3563 pRam = pRam->CTX_SUFF(pNext);
3564 }
3565 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3566}
3567
3568
3569/**
3570 * Clear references to guest physical memory.
3571 *
3572 * @param pPool The pool.
3573 * @param pPage The page.
3574 * @param HCPhys The host physical address corresponding to the guest page.
3575 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3576 */
3577static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3578{
3579 /*
3580 * Walk range list.
3581 */
3582 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3583 while (pRam)
3584 {
3585 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3586 if (off < pRam->cb)
3587 {
3588 /* does it match? */
3589 const unsigned iPage = off >> PAGE_SHIFT;
3590 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3591 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3592 {
3593 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3594 return;
3595 }
3596 break;
3597 }
3598 pRam = pRam->CTX_SUFF(pNext);
3599 }
3600
3601 /*
3602 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3603 */
3604 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3605 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3606 while (pRam)
3607 {
3608 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3609 while (iPage-- > 0)
3610 {
3611 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3612 {
3613 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3614 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3615 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3616 return;
3617 }
3618 }
3619 pRam = pRam->CTX_SUFF(pNext);
3620 }
3621
3622 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3623}
3624
3625
3626/**
3627 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3628 *
3629 * @param pPool The pool.
3630 * @param pPage The page.
3631 * @param pShwPT The shadow page table (mapping of the page).
3632 * @param pGstPT The guest page table.
3633 */
3634DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3635{
3636 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3637 if (pShwPT->a[i].n.u1Present)
3638 {
3639 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3640 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3641 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3642 if (!--pPage->cPresent)
3643 break;
3644 }
3645}
3646
3647
3648/**
3649 * Clear references to guest physical memory in a PAE / 32-bit page table.
3650 *
3651 * @param pPool The pool.
3652 * @param pPage The page.
3653 * @param pShwPT The shadow page table (mapping of the page).
3654 * @param pGstPT The guest page table (just a half one).
3655 */
3656DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3657{
3658 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3659 if (pShwPT->a[i].n.u1Present)
3660 {
3661 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3662 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3663 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3664 }
3665}
3666
3667
3668/**
3669 * Clear references to guest physical memory in a PAE / PAE page table.
3670 *
3671 * @param pPool The pool.
3672 * @param pPage The page.
3673 * @param pShwPT The shadow page table (mapping of the page).
3674 * @param pGstPT The guest page table.
3675 */
3676DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3677{
3678 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3679 if (pShwPT->a[i].n.u1Present)
3680 {
3681 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3682 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3683 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3684 }
3685}
3686
3687
3688/**
3689 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3690 *
3691 * @param pPool The pool.
3692 * @param pPage The page.
3693 * @param pShwPT The shadow page table (mapping of the page).
3694 */
3695DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3696{
3697 RTGCPHYS GCPhys = pPage->GCPhys;
3698 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3699 if (pShwPT->a[i].n.u1Present)
3700 {
3701 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3702 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3703 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3704 }
3705}
3706
3707
3708/**
3709 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3710 *
3711 * @param pPool The pool.
3712 * @param pPage The page.
3713 * @param pShwPT The shadow page table (mapping of the page).
3714 */
3715DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3716{
3717 RTGCPHYS GCPhys = pPage->GCPhys;
3718 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3719 if (pShwPT->a[i].n.u1Present)
3720 {
3721 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3722 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3723 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3724 }
3725}
3726
3727#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3728
3729
3730#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3731/**
3732 * Clear references to shadowed pages in a 32 bits page directory.
3733 *
3734 * @param pPool The pool.
3735 * @param pPage The page.
3736 * @param pShwPD The shadow page directory (mapping of the page).
3737 */
3738DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
3739{
3740 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3741 {
3742 if ( pShwPD->a[i].n.u1Present
3743 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3744 )
3745 {
3746 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
3747 if (pSubPage)
3748 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3749 else
3750 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
3751 }
3752 }
3753}
3754#endif
3755
3756/**
3757 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3758 *
3759 * @param pPool The pool.
3760 * @param pPage The page.
3761 * @param pShwPD The shadow page directory (mapping of the page).
3762 */
3763DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3764{
3765 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3766 {
3767 if ( pShwPD->a[i].n.u1Present
3768#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3769 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3770#endif
3771 )
3772 {
3773 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3774 if (pSubPage)
3775 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3776 else
3777 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3778 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3779 }
3780 }
3781}
3782
3783
3784/**
3785 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3786 *
3787 * @param pPool The pool.
3788 * @param pPage The page.
3789 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3790 */
3791DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3792{
3793 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3794 {
3795 if ( pShwPDPT->a[i].n.u1Present
3796#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3797 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
3798#endif
3799 )
3800 {
3801 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3802 if (pSubPage)
3803 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3804 else
3805 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3806 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3807 }
3808 }
3809}
3810
3811
3812/**
3813 * Clear references to shadowed pages in a 64-bit level 4 page table.
3814 *
3815 * @param pPool The pool.
3816 * @param pPage The page.
3817 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3818 */
3819DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3820{
3821 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3822 {
3823 if (pShwPML4->a[i].n.u1Present)
3824 {
3825 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3826 if (pSubPage)
3827 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3828 else
3829 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3830 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3831 }
3832 }
3833}
3834
3835
3836/**
3837 * Clear references to shadowed pages in an EPT page table.
3838 *
3839 * @param pPool The pool.
3840 * @param pPage The page.
3841 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3842 */
3843DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3844{
3845 RTGCPHYS GCPhys = pPage->GCPhys;
3846 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3847 if (pShwPT->a[i].n.u1Present)
3848 {
3849 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3850 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3851 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3852 }
3853}
3854
3855
3856/**
3857 * Clear references to shadowed pages in an EPT page directory.
3858 *
3859 * @param pPool The pool.
3860 * @param pPage The page.
3861 * @param pShwPD The shadow page directory (mapping of the page).
3862 */
3863DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3864{
3865 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3866 {
3867 if (pShwPD->a[i].n.u1Present)
3868 {
3869 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3870 if (pSubPage)
3871 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3872 else
3873 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3874 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3875 }
3876 }
3877}
3878
3879
3880/**
3881 * Clear references to shadowed pages in an EPT page directory pointer table.
3882 *
3883 * @param pPool The pool.
3884 * @param pPage The page.
3885 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3886 */
3887DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3888{
3889 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3890 {
3891 if (pShwPDPT->a[i].n.u1Present)
3892 {
3893 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3894 if (pSubPage)
3895 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3896 else
3897 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3898 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3899 }
3900 }
3901}
3902
3903
3904/**
3905 * Clears all references made by this page.
3906 *
3907 * This includes other shadow pages and GC physical addresses.
3908 *
3909 * @param pPool The pool.
3910 * @param pPage The page.
3911 */
3912static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3913{
3914 /*
3915 * Map the shadow page and take action according to the page kind.
3916 */
3917 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3918 switch (pPage->enmKind)
3919 {
3920#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3921 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3922 {
3923 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3924 void *pvGst;
3925 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3926 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3927 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3928 break;
3929 }
3930
3931 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3932 {
3933 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3934 void *pvGst;
3935 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3936 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3937 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3938 break;
3939 }
3940
3941 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3942 {
3943 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3944 void *pvGst;
3945 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3946 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3947 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3948 break;
3949 }
3950
3951 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3952 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3953 {
3954 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3955 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3956 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3957 break;
3958 }
3959
3960 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3961 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3962 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3963 {
3964 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3965 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3966 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3967 break;
3968 }
3969
3970#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3971 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3972 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3973 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3974 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3975 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3976 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3977 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3978 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3979 break;
3980#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3981
3982 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3983 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3984 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3985 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3986 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3987 case PGMPOOLKIND_PAE_PD_PHYS:
3988 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3989 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3990 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3991 break;
3992
3993#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3994 case PGMPOOLKIND_32BIT_PD_PHYS:
3995 case PGMPOOLKIND_32BIT_PD:
3996 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
3997 break;
3998
3999 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4000 case PGMPOOLKIND_PAE_PDPT:
4001 case PGMPOOLKIND_PAE_PDPT_PHYS:
4002#endif
4003 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4004 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4005 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4006 break;
4007
4008 case PGMPOOLKIND_64BIT_PML4:
4009 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4010 break;
4011
4012 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4013 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4014 break;
4015
4016 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4017 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4018 break;
4019
4020 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4021 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4022 break;
4023
4024 default:
4025 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4026 }
4027
4028 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4029 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4030 ASMMemZeroPage(pvShw);
4031 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4032 pPage->fZeroed = true;
4033}
4034
4035#endif /* PGMPOOL_WITH_USER_TRACKING */
4036
4037/**
4038 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
4039 *
4040 * @param pPool The pool.
4041 */
4042static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
4043{
4044#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4045 /* Start a subset so we won't run out of mapping space. */
4046 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4047 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4048#endif
4049
4050 /*
4051 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
4052 */
4053 Assert(NIL_PGMPOOL_IDX == 0);
4054 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
4055 {
4056 /*
4057 * Get the page address.
4058 */
4059 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4060 union
4061 {
4062 uint64_t *pau64;
4063 uint32_t *pau32;
4064 } u;
4065
4066 /*
4067 * Mark stuff not present.
4068 */
4069 switch (pPage->enmKind)
4070 {
4071#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4072 case PGMPOOLKIND_ROOT_32BIT_PD:
4073 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4074 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
4075 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
4076 u.pau32[iPage] = 0;
4077 break;
4078
4079 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4080 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4081 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
4082 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
4083 u.pau64[iPage] = 0;
4084 break;
4085
4086 case PGMPOOLKIND_ROOT_PDPT:
4087 /* Not root of shadowed pages currently, ignore it. */
4088 break;
4089#endif
4090
4091 case PGMPOOLKIND_ROOT_NESTED:
4092 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4093 ASMMemZero32(u.pau64, PAGE_SIZE);
4094 break;
4095 }
4096 }
4097
4098 /*
4099 * Paranoia (to be removed), flag a global CR3 sync.
4100 */
4101 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
4102
4103#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4104 /* Pop the subset. */
4105 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4106#endif
4107}
4108
4109
4110/**
4111 * Flushes the entire cache.
4112 *
4113 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4114 * and execute this CR3 flush.
4115 *
4116 * @param pPool The pool.
4117 */
4118static void pgmPoolFlushAllInt(PPGMPOOL pPool)
4119{
4120 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
4121 LogFlow(("pgmPoolFlushAllInt:\n"));
4122
4123 /*
4124 * If there are no pages in the pool, there is nothing to do.
4125 */
4126 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4127 {
4128 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4129 return;
4130 }
4131
4132 /*
4133 * Nuke the free list and reinsert all pages into it.
4134 */
4135 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4136 {
4137 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4138
4139#ifdef IN_RING3
4140 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMR3, pPage->pvPageR3));
4141#endif
4142#ifdef PGMPOOL_WITH_MONITORING
4143 if (pPage->fMonitored)
4144 pgmPoolMonitorFlush(pPool, pPage);
4145 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4146 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4147 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4148 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4149 pPage->cModifications = 0;
4150#endif
4151 pPage->GCPhys = NIL_RTGCPHYS;
4152 pPage->enmKind = PGMPOOLKIND_FREE;
4153 Assert(pPage->idx == i);
4154 pPage->iNext = i + 1;
4155 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4156 pPage->fSeenNonGlobal = false;
4157 pPage->fMonitored= false;
4158 pPage->fCached = false;
4159 pPage->fReusedFlushPending = false;
4160#ifdef PGMPOOL_WITH_USER_TRACKING
4161 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4162#else
4163 pPage->fCR3Mix = false;
4164#endif
4165#ifdef PGMPOOL_WITH_CACHE
4166 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4167 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4168#endif
4169 }
4170 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4171 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4172 pPool->cUsedPages = 0;
4173
4174#ifdef PGMPOOL_WITH_USER_TRACKING
4175 /*
4176 * Zap and reinitialize the user records.
4177 */
4178 pPool->cPresent = 0;
4179 pPool->iUserFreeHead = 0;
4180 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4181 const unsigned cMaxUsers = pPool->cMaxUsers;
4182 for (unsigned i = 0; i < cMaxUsers; i++)
4183 {
4184 paUsers[i].iNext = i + 1;
4185 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4186 paUsers[i].iUserTable = 0xfffffffe;
4187 }
4188 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4189#endif
4190
4191#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4192 /*
4193 * Clear all the GCPhys links and rebuild the phys ext free list.
4194 */
4195 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4196 pRam;
4197 pRam = pRam->CTX_SUFF(pNext))
4198 {
4199 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4200 while (iPage-- > 0)
4201 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
4202 }
4203
4204 pPool->iPhysExtFreeHead = 0;
4205 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4206 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4207 for (unsigned i = 0; i < cMaxPhysExts; i++)
4208 {
4209 paPhysExts[i].iNext = i + 1;
4210 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4211 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4212 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4213 }
4214 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4215#endif
4216
4217#ifdef PGMPOOL_WITH_MONITORING
4218 /*
4219 * Just zap the modified list.
4220 */
4221 pPool->cModifiedPages = 0;
4222 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4223#endif
4224
4225#ifdef PGMPOOL_WITH_CACHE
4226 /*
4227 * Clear the GCPhys hash and the age list.
4228 */
4229 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4230 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4231 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4232 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4233#endif
4234
4235 /*
4236 * Flush all the special root pages.
4237 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4238 */
4239 pgmPoolFlushAllSpecialRoots(pPool);
4240 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4241 {
4242 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4243 pPage->iNext = NIL_PGMPOOL_IDX;
4244#ifdef PGMPOOL_WITH_MONITORING
4245 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4246 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4247 pPage->cModifications = 0;
4248 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4249 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4250 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4251 if (pPage->fMonitored)
4252 {
4253 PVM pVM = pPool->CTX_SUFF(pVM);
4254 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4255 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4256 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4257 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4258 pPool->pszAccessHandler);
4259 AssertFatalRCSuccess(rc);
4260# ifdef PGMPOOL_WITH_CACHE
4261 pgmPoolHashInsert(pPool, pPage);
4262# endif
4263 }
4264#endif
4265#ifdef PGMPOOL_WITH_USER_TRACKING
4266 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4267#endif
4268#ifdef PGMPOOL_WITH_CACHE
4269 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4270 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4271#endif
4272 }
4273
4274 /*
4275 * Finally, assert the FF.
4276 */
4277 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
4278
4279 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4280}
4281
4282
4283/**
4284 * Flushes a pool page.
4285 *
4286 * This moves the page to the free list after removing all user references to it.
4287 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
4288 *
4289 * @returns VBox status code.
4290 * @retval VINF_SUCCESS on success.
4291 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
4292 * @param pPool The pool.
4293 * @param HCPhys The HC physical address of the shadow page.
4294 */
4295int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4296{
4297 int rc = VINF_SUCCESS;
4298 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4299 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4300 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4301
4302 /*
4303 * Quietly reject any attempts at flushing any of the special root pages.
4304 */
4305 if (pPage->idx < PGMPOOL_IDX_FIRST)
4306 {
4307 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4308 return VINF_SUCCESS;
4309 }
4310
4311 /*
4312 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4313 */
4314#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4315 if (pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
4316 {
4317 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4318 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4319 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4320 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4321 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4322 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4323 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4324 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4325 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4326 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4327#else
4328 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
4329 {
4330 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4,
4331 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4332#endif
4333 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4334 return VINF_SUCCESS;
4335 }
4336
4337#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4338 /* Start a subset so we won't run out of mapping space. */
4339 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4340 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4341#endif
4342
4343 /*
4344 * Mark the page as being in need of a ASMMemZeroPage().
4345 */
4346 pPage->fZeroed = false;
4347
4348#ifdef PGMPOOL_WITH_USER_TRACKING
4349 /*
4350 * Clear the page.
4351 */
4352 pgmPoolTrackClearPageUsers(pPool, pPage);
4353 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4354 pgmPoolTrackDeref(pPool, pPage);
4355 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4356#endif
4357
4358#ifdef PGMPOOL_WITH_CACHE
4359 /*
4360 * Flush it from the cache.
4361 */
4362 pgmPoolCacheFlushPage(pPool, pPage);
4363#endif /* PGMPOOL_WITH_CACHE */
4364
4365#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4366 /* Heavy stuff done. */
4367 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4368#endif
4369
4370#ifdef PGMPOOL_WITH_MONITORING
4371 /*
4372 * Deregistering the monitoring.
4373 */
4374 if (pPage->fMonitored)
4375 rc = pgmPoolMonitorFlush(pPool, pPage);
4376#endif
4377
4378 /*
4379 * Free the page.
4380 */
4381 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4382 pPage->iNext = pPool->iFreeHead;
4383 pPool->iFreeHead = pPage->idx;
4384 pPage->enmKind = PGMPOOLKIND_FREE;
4385 pPage->GCPhys = NIL_RTGCPHYS;
4386 pPage->fReusedFlushPending = false;
4387
4388 pPool->cUsedPages--;
4389 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4390 return rc;
4391}
4392
4393
4394/**
4395 * Frees a usage of a pool page.
4396 *
4397 * The caller is responsible to updating the user table so that it no longer
4398 * references the shadow page.
4399 *
4400 * @param pPool The pool.
4401 * @param HCPhys The HC physical address of the shadow page.
4402 * @param iUser The shadow page pool index of the user table.
4403 * @param iUserTable The index into the user table (shadowed).
4404 */
4405void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4406{
4407 STAM_PROFILE_START(&pPool->StatFree, a);
4408 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4409 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4410 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4411#ifdef PGMPOOL_WITH_USER_TRACKING
4412 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4413#endif
4414#ifdef PGMPOOL_WITH_CACHE
4415 if (!pPage->fCached)
4416#endif
4417 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
4418 STAM_PROFILE_STOP(&pPool->StatFree, a);
4419}
4420
4421
4422/**
4423 * Makes one or more free page free.
4424 *
4425 * @returns VBox status code.
4426 * @retval VINF_SUCCESS on success.
4427 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4428 *
4429 * @param pPool The pool.
4430 * @param enmKind Page table kind
4431 * @param iUser The user of the page.
4432 */
4433static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4434{
4435 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4436
4437 /*
4438 * If the pool isn't full grown yet, expand it.
4439 */
4440 if ( pPool->cCurPages < pPool->cMaxPages
4441#if defined(VBOX_WITH_PGMPOOL_PAGING_ONLY) && defined(IN_RC)
4442 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4443 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4444 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4445#endif
4446 )
4447 {
4448 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4449#ifdef IN_RING3
4450 int rc = PGMR3PoolGrow(pPool->pVMR3);
4451#else
4452 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
4453#endif
4454 if (RT_FAILURE(rc))
4455 return rc;
4456 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4457 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4458 return VINF_SUCCESS;
4459 }
4460
4461#ifdef PGMPOOL_WITH_CACHE
4462 /*
4463 * Free one cached page.
4464 */
4465 return pgmPoolCacheFreeOne(pPool, iUser);
4466#else
4467 /*
4468 * Flush the pool.
4469 *
4470 * If we have tracking enabled, it should be possible to come up with
4471 * a cheap replacement strategy...
4472 */
4473 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
4474 Assert(!CPUMIsGuestInLongMode(pVM));
4475 pgmPoolFlushAllInt(pPool);
4476 return VERR_PGM_POOL_FLUSHED;
4477#endif
4478}
4479
4480
4481/**
4482 * Allocates a page from the pool.
4483 *
4484 * This page may actually be a cached page and not in need of any processing
4485 * on the callers part.
4486 *
4487 * @returns VBox status code.
4488 * @retval VINF_SUCCESS if a NEW page was allocated.
4489 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4490 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4491 * @param pVM The VM handle.
4492 * @param GCPhys The GC physical address of the page we're gonna shadow.
4493 * For 4MB and 2MB PD entries, it's the first address the
4494 * shadow PT is covering.
4495 * @param enmKind The kind of mapping.
4496 * @param iUser The shadow page pool index of the user table.
4497 * @param iUserTable The index into the user table (shadowed).
4498 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4499 */
4500int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
4501{
4502 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4503 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4504 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4505 *ppPage = NULL;
4506 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4507 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4508 * Assert(!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4509
4510#ifdef PGMPOOL_WITH_CACHE
4511 if (pPool->fCacheEnabled)
4512 {
4513 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
4514 if (RT_SUCCESS(rc2))
4515 {
4516 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4517 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4518 return rc2;
4519 }
4520 }
4521#endif
4522
4523 /*
4524 * Allocate a new one.
4525 */
4526 int rc = VINF_SUCCESS;
4527 uint16_t iNew = pPool->iFreeHead;
4528 if (iNew == NIL_PGMPOOL_IDX)
4529 {
4530 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4531 if (RT_FAILURE(rc))
4532 {
4533 if (rc != VERR_PGM_POOL_CLEARED)
4534 {
4535 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4536 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4537 return rc;
4538 }
4539 Log(("pgmPoolMakeMoreFreePages failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
4540 rc = VERR_PGM_POOL_FLUSHED;
4541 }
4542 iNew = pPool->iFreeHead;
4543 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4544 }
4545
4546 /* unlink the free head */
4547 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4548 pPool->iFreeHead = pPage->iNext;
4549 pPage->iNext = NIL_PGMPOOL_IDX;
4550
4551 /*
4552 * Initialize it.
4553 */
4554 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4555 pPage->enmKind = enmKind;
4556 pPage->GCPhys = GCPhys;
4557 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4558 pPage->fMonitored = false;
4559 pPage->fCached = false;
4560 pPage->fReusedFlushPending = false;
4561#ifdef PGMPOOL_WITH_MONITORING
4562 pPage->cModifications = 0;
4563 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4564 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4565#else
4566 pPage->fCR3Mix = false;
4567#endif
4568#ifdef PGMPOOL_WITH_USER_TRACKING
4569 pPage->cPresent = 0;
4570 pPage->iFirstPresent = ~0;
4571
4572 /*
4573 * Insert into the tracking and cache. If this fails, free the page.
4574 */
4575 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4576 if (RT_FAILURE(rc3))
4577 {
4578 if (rc3 != VERR_PGM_POOL_CLEARED)
4579 {
4580 pPool->cUsedPages--;
4581 pPage->enmKind = PGMPOOLKIND_FREE;
4582 pPage->GCPhys = NIL_RTGCPHYS;
4583 pPage->iNext = pPool->iFreeHead;
4584 pPool->iFreeHead = pPage->idx;
4585 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4586 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4587 return rc3;
4588 }
4589 Log(("pgmPoolTrackInsert failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4590 rc = VERR_PGM_POOL_FLUSHED;
4591 }
4592#endif /* PGMPOOL_WITH_USER_TRACKING */
4593
4594 /*
4595 * Commit the allocation, clear the page and return.
4596 */
4597#ifdef VBOX_WITH_STATISTICS
4598 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4599 pPool->cUsedPagesHigh = pPool->cUsedPages;
4600#endif
4601
4602 if (!pPage->fZeroed)
4603 {
4604 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4605 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4606 ASMMemZeroPage(pv);
4607 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4608 }
4609
4610 *ppPage = pPage;
4611 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4612 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4613 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4614 return rc;
4615}
4616
4617
4618/**
4619 * Frees a usage of a pool page.
4620 *
4621 * @param pVM The VM handle.
4622 * @param HCPhys The HC physical address of the shadow page.
4623 * @param iUser The shadow page pool index of the user table.
4624 * @param iUserTable The index into the user table (shadowed).
4625 */
4626void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4627{
4628 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4629 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4630 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4631}
4632
4633
4634/**
4635 * Gets a in-use page in the pool by it's physical address.
4636 *
4637 * @returns Pointer to the page.
4638 * @param pVM The VM handle.
4639 * @param HCPhys The HC physical address of the shadow page.
4640 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4641 */
4642PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4643{
4644 /** @todo profile this! */
4645 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4646 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4647 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%RHp -> %p:{.idx=%d .GCPhys=%RGp .enmKind=%s}\n",
4648 HCPhys, pPage, pPage->idx, pPage->GCPhys, pgmPoolPoolKindToStr(pPage->enmKind)));
4649 return pPage;
4650}
4651
4652
4653/**
4654 * Flushes the entire cache.
4655 *
4656 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4657 * and execute this CR3 flush.
4658 *
4659 * @param pPool The pool.
4660 */
4661void pgmPoolFlushAll(PVM pVM)
4662{
4663 LogFlow(("pgmPoolFlushAll:\n"));
4664 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4665}
4666
4667#ifdef LOG_ENABLED
4668static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
4669{
4670 switch(enmKind)
4671 {
4672 case PGMPOOLKIND_INVALID:
4673 return "PGMPOOLKIND_INVALID";
4674 case PGMPOOLKIND_FREE:
4675 return "PGMPOOLKIND_FREE";
4676 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4677 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4678 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4679 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4680 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4681 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4682 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4683 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4684 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4685 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4686 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4687 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
4688 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4689 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
4690 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4691 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
4692 case PGMPOOLKIND_32BIT_PD:
4693 return "PGMPOOLKIND_32BIT_PD";
4694 case PGMPOOLKIND_32BIT_PD_PHYS:
4695 return "PGMPOOLKIND_32BIT_PD_PHYS";
4696 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4697 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
4698 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4699 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
4700 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4701 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
4702 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4703 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
4704 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4705 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
4706 case PGMPOOLKIND_PAE_PD_PHYS:
4707 return "PGMPOOLKIND_PAE_PD_PHYS";
4708 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4709 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
4710 case PGMPOOLKIND_PAE_PDPT:
4711 return "PGMPOOLKIND_PAE_PDPT";
4712 case PGMPOOLKIND_PAE_PDPT_PHYS:
4713 return "PGMPOOLKIND_PAE_PDPT_PHYS";
4714 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4715 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
4716 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4717 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
4718 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4719 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
4720 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4721 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
4722 case PGMPOOLKIND_64BIT_PML4:
4723 return "PGMPOOLKIND_64BIT_PML4";
4724 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4725 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
4726 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4727 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
4728 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4729 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
4730#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4731 case PGMPOOLKIND_ROOT_32BIT_PD:
4732 return "PGMPOOLKIND_ROOT_32BIT_PD";
4733 case PGMPOOLKIND_ROOT_PAE_PD:
4734 return "PGMPOOLKIND_ROOT_PAE_PD";
4735 case PGMPOOLKIND_ROOT_PDPT:
4736 return "PGMPOOLKIND_ROOT_PDPT";
4737#endif
4738 case PGMPOOLKIND_ROOT_NESTED:
4739 return "PGMPOOLKIND_ROOT_NESTED";
4740 }
4741 return "Unknown kind!";
4742}
4743#endif
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette