VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 17121

Last change on this file since 17121 was 17120, checked in by vboxsync, 16 years ago

Compile fix

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 172.7 KB
Line 
1/* $Id: PGMAllPool.cpp 17120 2009-02-25 11:06:09Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48__BEGIN_DECLS
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50#ifdef PGMPOOL_WITH_USER_TRACKING
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54#endif
55#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
56static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
57#endif
58#ifdef PGMPOOL_WITH_CACHE
59static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
60#endif
61#ifdef PGMPOOL_WITH_MONITORING
62static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
63#endif
64#ifndef IN_RING3
65DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
66#endif
67#ifdef LOG_ENABLED
68static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
69#endif
70__END_DECLS
71
72
73/**
74 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
75 *
76 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
77 * @param enmKind The page kind.
78 */
79DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
80{
81 switch (enmKind)
82 {
83 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
84 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
85 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
86 return true;
87 default:
88 return false;
89 }
90}
91
92
93#if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
94/**
95 * Maps a pool page into the current context.
96 *
97 * @returns Pointer to the mapping.
98 * @param pPGM Pointer to the PGM instance data.
99 * @param pPage The page to map.
100 */
101void *pgmPoolMapPageFallback(PPGM pPGM, PPGMPOOLPAGE pPage)
102{
103 /* general pages are take care of by the inlined part, it
104 only ends up here in case of failure. */
105 AssertReleaseReturn(pPage->idx < PGMPOOL_IDX_FIRST, NULL);
106
107/** @todo make sure HCPhys is valid for *all* indexes. */
108 /* special pages. */
109# ifdef IN_RC
110 switch (pPage->idx)
111 {
112# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
113 case PGMPOOL_IDX_PD:
114 case PGMPOOL_IDX_PDPT:
115 case PGMPOOL_IDX_AMD64_CR3:
116 return pPGM->pShwRootRC;
117# else
118 case PGMPOOL_IDX_PD:
119 return pPGM->pShw32BitPdRC;
120 case PGMPOOL_IDX_PAE_PD:
121 case PGMPOOL_IDX_PAE_PD_0:
122 return pPGM->apShwPaePDsRC[0];
123 case PGMPOOL_IDX_PAE_PD_1:
124 return pPGM->apShwPaePDsRC[1];
125 case PGMPOOL_IDX_PAE_PD_2:
126 return pPGM->apShwPaePDsRC[2];
127 case PGMPOOL_IDX_PAE_PD_3:
128 return pPGM->apShwPaePDsRC[3];
129 case PGMPOOL_IDX_PDPT:
130 return pPGM->pShwPaePdptRC;
131# endif
132 default:
133 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
134 return NULL;
135 }
136
137# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
138 RTHCPHYS HCPhys;
139 switch (pPage->idx)
140 {
141# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
142 case PGMPOOL_IDX_PD:
143 case PGMPOOL_IDX_PDPT:
144 case PGMPOOL_IDX_AMD64_CR3:
145 HCPhys = pPGM->HCPhysShwCR3;
146 break;
147
148 case PGMPOOL_IDX_NESTED_ROOT:
149 HCPhys = pPGM->HCPhysShwNestedRoot;
150 break;
151# else
152 case PGMPOOL_IDX_PD:
153 HCPhys = pPGM->HCPhysShw32BitPD;
154 break;
155 case PGMPOOL_IDX_PAE_PD_0:
156 HCPhys = pPGM->aHCPhysPaePDs[0];
157 break;
158 case PGMPOOL_IDX_PAE_PD_1:
159 HCPhys = pPGM->aHCPhysPaePDs[1];
160 break;
161 case PGMPOOL_IDX_PAE_PD_2:
162 HCPhys = pPGM->aHCPhysPaePDs[2];
163 break;
164 case PGMPOOL_IDX_PAE_PD_3:
165 HCPhys = pPGM->aHCPhysPaePDs[3];
166 break;
167 case PGMPOOL_IDX_PDPT:
168 HCPhys = pPGM->HCPhysShwPaePdpt;
169 break;
170 case PGMPOOL_IDX_NESTED_ROOT:
171 HCPhys = pPGM->HCPhysShwNestedRoot;
172 break;
173 case PGMPOOL_IDX_PAE_PD:
174 AssertReleaseMsgFailed(("PGMPOOL_IDX_PAE_PD is not usable in VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 context\n"));
175 return NULL;
176# endif
177 default:
178 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
179 return NULL;
180 }
181 AssertMsg(HCPhys && HCPhys != NIL_RTHCPHYS && !(PAGE_OFFSET_MASK & HCPhys), ("%RHp\n", HCPhys));
182
183 void *pv;
184 pgmR0DynMapHCPageInlined(pPGM, HCPhys, &pv);
185 return pv;
186# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
187}
188#endif /* IN_RC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
189
190
191#ifdef PGMPOOL_WITH_MONITORING
192/**
193 * Determin the size of a write instruction.
194 * @returns number of bytes written.
195 * @param pDis The disassembler state.
196 */
197static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
198{
199 /*
200 * This is very crude and possibly wrong for some opcodes,
201 * but since it's not really supposed to be called we can
202 * probably live with that.
203 */
204 return DISGetParamSize(pDis, &pDis->param1);
205}
206
207
208/**
209 * Flushes a chain of pages sharing the same access monitor.
210 *
211 * @returns VBox status code suitable for scheduling.
212 * @param pPool The pool.
213 * @param pPage A page in the chain.
214 */
215int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
216{
217 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
218
219 /*
220 * Find the list head.
221 */
222 uint16_t idx = pPage->idx;
223 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
224 {
225 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
226 {
227 idx = pPage->iMonitoredPrev;
228 Assert(idx != pPage->idx);
229 pPage = &pPool->aPages[idx];
230 }
231 }
232
233 /*
234 * Iterate the list flushing each shadow page.
235 */
236 int rc = VINF_SUCCESS;
237 for (;;)
238 {
239 idx = pPage->iMonitoredNext;
240 Assert(idx != pPage->idx);
241 if (pPage->idx >= PGMPOOL_IDX_FIRST)
242 {
243 int rc2 = pgmPoolFlushPage(pPool, pPage);
244 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
245 rc = VINF_PGM_SYNC_CR3;
246 }
247 /* next */
248 if (idx == NIL_PGMPOOL_IDX)
249 break;
250 pPage = &pPool->aPages[idx];
251 }
252 return rc;
253}
254
255
256/**
257 * Wrapper for getting the current context pointer to the entry being modified.
258 *
259 * @returns VBox status code suitable for scheduling.
260 * @param pVM VM Handle.
261 * @param pvDst Destination address
262 * @param pvSrc Source guest virtual address.
263 * @param GCPhysSrc The source guest physical address.
264 * @param cb Size of data to read
265 */
266DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
267{
268#ifdef IN_RC
269 int rc = MMGCRamRead(pVM, (RTRCPTR)((RTRCUINTPTR)pvDst & ~(cb - 1)), (RTRCPTR)pvSrc, cb);
270 if (RT_FAILURE(rc))
271 rc = PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
272 return rc;
273#elif defined(IN_RING3)
274 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
275 return VINF_SUCCESS;
276#else
277 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
278#endif
279}
280
281/**
282 * Process shadow entries before they are changed by the guest.
283 *
284 * For PT entries we will clear them. For PD entries, we'll simply check
285 * for mapping conflicts and set the SyncCR3 FF if found.
286 *
287 * @param pPool The pool.
288 * @param pPage The head page.
289 * @param GCPhysFault The guest physical fault address.
290 * @param uAddress In R0 and GC this is the guest context fault address (flat).
291 * In R3 this is the host context 'fault' address.
292 * @param pCpu The disassembler state for figuring out the write size.
293 * This need not be specified if the caller knows we won't do cross entry accesses.
294 */
295void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, PDISCPUSTATE pCpu)
296{
297 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
298 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
299 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
300
301 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
302
303 for (;;)
304 {
305 union
306 {
307 void *pv;
308 PX86PT pPT;
309 PX86PTPAE pPTPae;
310 PX86PD pPD;
311 PX86PDPAE pPDPae;
312 PX86PDPT pPDPT;
313 PX86PML4 pPML4;
314 } uShw;
315
316 switch (pPage->enmKind)
317 {
318 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
319 {
320 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
321 const unsigned iShw = off / sizeof(X86PTE);
322 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
323 if (uShw.pPT->a[iShw].n.u1Present)
324 {
325# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
326 X86PTE GstPte;
327
328 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
329 AssertRC(rc);
330 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
331 pgmPoolTracDerefGCPhysHint(pPool, pPage,
332 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
333 GstPte.u & X86_PTE_PG_MASK);
334# endif
335 uShw.pPT->a[iShw].u = 0;
336 }
337 break;
338 }
339
340 /* page/2 sized */
341 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
342 {
343 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
344 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
345 {
346 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
347 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
348 if (uShw.pPTPae->a[iShw].n.u1Present)
349 {
350# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
351 X86PTE GstPte;
352 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
353 AssertRC(rc);
354
355 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
356 pgmPoolTracDerefGCPhysHint(pPool, pPage,
357 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
358 GstPte.u & X86_PTE_PG_MASK);
359# endif
360 uShw.pPTPae->a[iShw].u = 0;
361 }
362 }
363 break;
364 }
365
366# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
367 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
368 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
369 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
370 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
371 {
372 unsigned iGst = off / sizeof(X86PDE);
373 unsigned iShwPdpt = iGst / 256;
374 unsigned iShw = (iGst % 256) * 2;
375 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
376
377 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x idx = %d page idx=%d\n", iGst, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
378 if (iShwPdpt == pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
379 {
380 for (unsigned i=0;i<2;i++)
381 {
382# ifndef IN_RING0
383 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
384 {
385 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
386 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
387 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
388 break;
389 }
390 else
391# endif /* !IN_RING0 */
392 if (uShw.pPDPae->a[iShw+i].n.u1Present)
393 {
394 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
395 pgmPoolFree(pPool->CTX_SUFF(pVM),
396 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
397 pPage->idx,
398 iShw + i);
399 uShw.pPDPae->a[iShw+i].u = 0;
400 }
401
402 /* paranoia / a bit assumptive. */
403 if ( pCpu
404 && (off & 3)
405 && (off & 3) + cbWrite > 4)
406 {
407 const unsigned iShw2 = iShw + 2 + i;
408 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
409 {
410# ifndef IN_RING0
411 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
412 {
413 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
414 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
415 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
416 break;
417 }
418 else
419# endif /* !IN_RING0 */
420 if (uShw.pPDPae->a[iShw2].n.u1Present)
421 {
422 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
423 pgmPoolFree(pPool->CTX_SUFF(pVM),
424 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
425 pPage->idx,
426 iShw2);
427 uShw.pPDPae->a[iShw2].u = 0;
428 }
429 }
430 }
431 }
432 }
433 break;
434 }
435# endif
436
437
438 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
439 {
440 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
441 const unsigned iShw = off / sizeof(X86PTEPAE);
442 if (uShw.pPTPae->a[iShw].n.u1Present)
443 {
444# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
445 X86PTEPAE GstPte;
446 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
447 AssertRC(rc);
448
449 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
450 pgmPoolTracDerefGCPhysHint(pPool, pPage,
451 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
452 GstPte.u & X86_PTE_PAE_PG_MASK);
453# endif
454 uShw.pPTPae->a[iShw].u = 0;
455 }
456
457 /* paranoia / a bit assumptive. */
458 if ( pCpu
459 && (off & 7)
460 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
461 {
462 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
463 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
464
465 if (uShw.pPTPae->a[iShw2].n.u1Present)
466 {
467# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
468 X86PTEPAE GstPte;
469# ifdef IN_RING3
470 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
471# else
472 int rc = pgmPoolPhysSimpleReadGCPhys(pPool->CTX_SUFF(pVM), &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
473# endif
474 AssertRC(rc);
475 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
476 pgmPoolTracDerefGCPhysHint(pPool, pPage,
477 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
478 GstPte.u & X86_PTE_PAE_PG_MASK);
479# endif
480 uShw.pPTPae->a[iShw2].u = 0;
481 }
482 }
483 break;
484 }
485
486# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
487 case PGMPOOLKIND_32BIT_PD:
488# else
489 case PGMPOOLKIND_ROOT_32BIT_PD:
490# endif
491 {
492 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
493 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
494
495# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
496 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
497# endif
498# ifndef IN_RING0
499 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
500 {
501 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
502 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
503 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
504 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
505 break;
506 }
507# endif /* !IN_RING0 */
508# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
509# ifndef IN_RING0
510 else
511# endif /* !IN_RING0 */
512 {
513 if (uShw.pPD->a[iShw].n.u1Present)
514 {
515 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
516 pgmPoolFree(pPool->CTX_SUFF(pVM),
517 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
518 pPage->idx,
519 iShw);
520 uShw.pPD->a[iShw].u = 0;
521 }
522 }
523# endif
524 /* paranoia / a bit assumptive. */
525 if ( pCpu
526 && (off & 3)
527 && (off & 3) + cbWrite > sizeof(X86PTE))
528 {
529 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
530 if ( iShw2 != iShw
531 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
532 {
533# ifndef IN_RING0
534 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
535 {
536 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
537 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
538 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
539 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
540 }
541# endif /* !IN_RING0 */
542# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
543# ifndef IN_RING0
544 else
545# endif /* !IN_RING0 */
546 {
547 if (uShw.pPD->a[iShw2].n.u1Present)
548 {
549 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
550 pgmPoolFree(pPool->CTX_SUFF(pVM),
551 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
552 pPage->idx,
553 iShw2);
554 uShw.pPD->a[iShw2].u = 0;
555 }
556 }
557# endif
558 }
559 }
560#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
561 if ( uShw.pPD->a[iShw].n.u1Present
562 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
563 {
564 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
565# ifdef IN_RC /* TLB load - we're pushing things a bit... */
566 ASMProbeReadByte(pvAddress);
567# endif
568 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
569 uShw.pPD->a[iShw].u = 0;
570 }
571#endif
572 break;
573 }
574
575# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
576 case PGMPOOLKIND_ROOT_PAE_PD:
577 {
578 unsigned iGst = off / sizeof(X86PDE); // ASSUMING 32-bit guest paging!
579 unsigned iShwPdpt = iGst / 256;
580 unsigned iShw = (iGst % 256) * 2;
581 Assert(pPage->idx == PGMPOOL_IDX_PAE_PD);
582 PPGMPOOLPAGE pPage2 = pPage + 1 + iShwPdpt;
583 Assert(pPage2->idx == PGMPOOL_IDX_PAE_PD_0 + iShwPdpt);
584 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage2);
585 for (unsigned i = 0; i < 2; i++, iShw++)
586 {
587 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
588 {
589 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
590 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
591 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw));
592 }
593 /* paranoia / a bit assumptive. */
594 else if ( pCpu
595 && (off & 3)
596 && (off & 3) + cbWrite > 4)
597 {
598 const unsigned iShw2 = iShw + 2;
599 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a) /** @todo was completely wrong, it's better now after #1865 but still wrong from cross PD. */
600 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
601 {
602 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
603 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
604 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
605 }
606 }
607#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
608 if ( uShw.pPDPae->a[iShw].n.u1Present
609 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
610 {
611 LogFlow(("pgmPoolMonitorChainChanging: iShwPdpt=%#x iShw=%#x: %RX64 -> freeing it!\n", iShwPdpt, iShw, uShw.pPDPae->a[iShw].u));
612# ifdef IN_RC /* TLB load - we're pushing things a bit... */
613 ASMProbeReadByte(pvAddress);
614# endif
615 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + iShwPdpt * X86_PG_PAE_ENTRIES);
616 uShw.pPDPae->a[iShw].u = 0;
617 }
618#endif
619 }
620 break;
621 }
622# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
623
624 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
625 {
626 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
627 const unsigned iShw = off / sizeof(X86PDEPAE);
628#ifndef IN_RING0
629 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
630 {
631 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
632 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
633 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
634 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
635 break;
636 }
637#endif /* !IN_RING0 */
638#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
639 /*
640 * Causes trouble when the guest uses a PDE to refer to the whole page table level
641 * structure. (Invalidate here; faults later on when it tries to change the page
642 * table entries -> recheck; probably only applies to the RC case.)
643 */
644# ifndef IN_RING0
645 else
646# endif /* !IN_RING0 */
647 {
648 if (uShw.pPDPae->a[iShw].n.u1Present)
649 {
650 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
651 pgmPoolFree(pPool->CTX_SUFF(pVM),
652 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
653# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
654 pPage->idx,
655 iShw);
656# else
657 /* Note: hardcoded PAE implementation dependency */
658 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
659 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
660# endif
661 uShw.pPDPae->a[iShw].u = 0;
662 }
663 }
664#endif
665 /* paranoia / a bit assumptive. */
666 if ( pCpu
667 && (off & 7)
668 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
669 {
670 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
671 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
672
673#ifndef IN_RING0
674 if ( iShw2 != iShw
675 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
676 {
677 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
678 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
679 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
680 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
681 }
682#endif /* !IN_RING0 */
683#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
684# ifndef IN_RING0
685 else
686# endif /* !IN_RING0 */
687 if (uShw.pPDPae->a[iShw2].n.u1Present)
688 {
689 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
690 pgmPoolFree(pPool->CTX_SUFF(pVM),
691 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
692# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
693 pPage->idx,
694 iShw2);
695# else
696 /* Note: hardcoded PAE implementation dependency */
697 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
698 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
699# endif
700 uShw.pPDPae->a[iShw2].u = 0;
701 }
702#endif
703 }
704 break;
705 }
706
707# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
708 case PGMPOOLKIND_PAE_PDPT:
709# else
710 case PGMPOOLKIND_ROOT_PDPT:
711# endif
712 {
713 /*
714 * Hopefully this doesn't happen very often:
715 * - touching unused parts of the page
716 * - messing with the bits of pd pointers without changing the physical address
717 */
718# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
719 /* PDPT roots are not page aligned; 32 byte only! */
720 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
721# else
722 const unsigned offPdpt = off;
723# endif
724 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
725 const unsigned iShw = offPdpt / sizeof(X86PDPE);
726 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
727 {
728# ifndef IN_RING0
729 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
730 {
731 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
732 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
733 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
734 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
735 break;
736 }
737# endif /* !IN_RING0 */
738# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
739# ifndef IN_RING0
740 else
741# endif /* !IN_RING0 */
742 if (uShw.pPDPT->a[iShw].n.u1Present)
743 {
744 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
745 pgmPoolFree(pPool->CTX_SUFF(pVM),
746 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
747 pPage->idx,
748 iShw);
749 uShw.pPDPT->a[iShw].u = 0;
750 }
751# endif
752
753 /* paranoia / a bit assumptive. */
754 if ( pCpu
755 && (offPdpt & 7)
756 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
757 {
758 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
759 if ( iShw2 != iShw
760 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
761 {
762# ifndef IN_RING0
763 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
764 {
765 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
766 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
767 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
768 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
769 }
770# endif /* !IN_RING0 */
771# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
772# ifndef IN_RING0
773 else
774# endif /* !IN_RING0 */
775 if (uShw.pPDPT->a[iShw2].n.u1Present)
776 {
777 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
778 pgmPoolFree(pPool->CTX_SUFF(pVM),
779 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
780 pPage->idx,
781 iShw2);
782 uShw.pPDPT->a[iShw2].u = 0;
783 }
784# endif
785 }
786 }
787 }
788 break;
789 }
790
791#ifndef IN_RC
792 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
793 {
794 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
795 const unsigned iShw = off / sizeof(X86PDEPAE);
796 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
797 if (uShw.pPDPae->a[iShw].n.u1Present)
798 {
799 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
800 pgmPoolFree(pPool->CTX_SUFF(pVM),
801 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
802 pPage->idx,
803 iShw);
804 uShw.pPDPae->a[iShw].u = 0;
805 }
806 /* paranoia / a bit assumptive. */
807 if ( pCpu
808 && (off & 7)
809 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
810 {
811 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
812 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
813
814 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
815 if (uShw.pPDPae->a[iShw2].n.u1Present)
816 {
817 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
818 pgmPoolFree(pPool->CTX_SUFF(pVM),
819 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
820 pPage->idx,
821 iShw2);
822 uShw.pPDPae->a[iShw2].u = 0;
823 }
824 }
825 break;
826 }
827
828 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
829 {
830 /*
831 * Hopefully this doesn't happen very often:
832 * - messing with the bits of pd pointers without changing the physical address
833 */
834# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
835 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
836# endif
837 {
838 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
839 const unsigned iShw = off / sizeof(X86PDPE);
840 if (uShw.pPDPT->a[iShw].n.u1Present)
841 {
842 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
843 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
844 uShw.pPDPT->a[iShw].u = 0;
845 }
846 /* paranoia / a bit assumptive. */
847 if ( pCpu
848 && (off & 7)
849 && (off & 7) + cbWrite > sizeof(X86PDPE))
850 {
851 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
852 if (uShw.pPDPT->a[iShw2].n.u1Present)
853 {
854 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
855 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
856 uShw.pPDPT->a[iShw2].u = 0;
857 }
858 }
859 }
860 break;
861 }
862
863 case PGMPOOLKIND_64BIT_PML4:
864 {
865 /*
866 * Hopefully this doesn't happen very often:
867 * - messing with the bits of pd pointers without changing the physical address
868 */
869# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
870 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
871# endif
872 {
873 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
874 const unsigned iShw = off / sizeof(X86PDPE);
875 if (uShw.pPML4->a[iShw].n.u1Present)
876 {
877 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
878 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
879 uShw.pPML4->a[iShw].u = 0;
880 }
881 /* paranoia / a bit assumptive. */
882 if ( pCpu
883 && (off & 7)
884 && (off & 7) + cbWrite > sizeof(X86PDPE))
885 {
886 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
887 if (uShw.pPML4->a[iShw2].n.u1Present)
888 {
889 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
890 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
891 uShw.pPML4->a[iShw2].u = 0;
892 }
893 }
894 }
895 break;
896 }
897#endif /* IN_RING0 */
898
899 default:
900 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
901 }
902
903 /* next */
904 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
905 return;
906 pPage = &pPool->aPages[pPage->iMonitoredNext];
907 }
908}
909
910#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
911/**
912 * Checks if the page is the active CR3 or is one of the four PDs of a PAE PDPT
913 *
914 * @returns VBox status code (appropriate for GC return).
915 * @param pVM VM Handle.
916 * @param pPage PGM pool page
917 */
918bool pgmPoolIsActiveRootPage(PVM pVM, PPGMPOOLPAGE pPage)
919{
920 /* First check the simple case. */
921 if (pPage == pVM->pgm.s.CTX_SUFF(pShwPageCR3))
922 {
923 LogFlow(("pgmPoolIsActiveRootPage found CR3 root\n"));
924 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
925 return true;
926 }
927
928# ifndef IN_RING0
929 switch (PGMGetShadowMode(pVM))
930 {
931 case PGMMODE_PAE:
932 case PGMMODE_PAE_NX:
933 {
934 switch (pPage->enmKind)
935 {
936 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
937 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
938 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
939 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
940 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
941 {
942 PX86PDPT pPdpt = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
943 Assert(pPdpt);
944
945 for (unsigned i=0;i<X86_PG_PAE_PDPE_ENTRIES;i++)
946 {
947 if ( (pPdpt->a[i].u & PGM_PLXFLAGS_MAPPING)
948 && pPage->Core.Key == (pPdpt->a[i].u & X86_PDPE_PG_MASK))
949 {
950 Assert(pPdpt->a[i].n.u1Present);
951 LogFlow(("pgmPoolIsActiveRootPage found PAE PDPE root\n"));
952 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
953 return true;
954 }
955 }
956 break;
957 }
958 }
959
960 break;
961 }
962 }
963# endif
964 return false;
965}
966#endif /* VBOX_WITH_PGMPOOL_PAGING_ONLY */
967
968
969# ifndef IN_RING3
970/**
971 * Checks if a access could be a fork operation in progress.
972 *
973 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
974 *
975 * @returns true if it's likly that we're forking, otherwise false.
976 * @param pPool The pool.
977 * @param pCpu The disassembled instruction.
978 * @param offFault The access offset.
979 */
980DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
981{
982 /*
983 * i386 linux is using btr to clear X86_PTE_RW.
984 * The functions involved are (2.6.16 source inspection):
985 * clear_bit
986 * ptep_set_wrprotect
987 * copy_one_pte
988 * copy_pte_range
989 * copy_pmd_range
990 * copy_pud_range
991 * copy_page_range
992 * dup_mmap
993 * dup_mm
994 * copy_mm
995 * copy_process
996 * do_fork
997 */
998 if ( pCpu->pCurInstr->opcode == OP_BTR
999 && !(offFault & 4)
1000 /** @todo Validate that the bit index is X86_PTE_RW. */
1001 )
1002 {
1003 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
1004 return true;
1005 }
1006 return false;
1007}
1008
1009
1010/**
1011 * Determine whether the page is likely to have been reused.
1012 *
1013 * @returns true if we consider the page as being reused for a different purpose.
1014 * @returns false if we consider it to still be a paging page.
1015 * @param pVM VM Handle.
1016 * @param pPage The page in question.
1017 * @param pRegFrame Trap register frame.
1018 * @param pCpu The disassembly info for the faulting instruction.
1019 * @param pvFault The fault address.
1020 *
1021 * @remark The REP prefix check is left to the caller because of STOSD/W.
1022 */
1023DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
1024{
1025#ifndef IN_RC
1026 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
1027 if ( HWACCMHasPendingIrq(pVM)
1028 && (pRegFrame->rsp - pvFault) < 32)
1029 {
1030 /* Fault caused by stack writes while trying to inject an interrupt event. */
1031 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
1032 return true;
1033 }
1034#else
1035 NOREF(pVM); NOREF(pvFault);
1036#endif
1037
1038 switch (pCpu->pCurInstr->opcode)
1039 {
1040 /* call implies the actual push of the return address faulted */
1041 case OP_CALL:
1042 Log4(("pgmPoolMonitorIsReused: CALL\n"));
1043 return true;
1044 case OP_PUSH:
1045 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
1046 return true;
1047 case OP_PUSHF:
1048 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
1049 return true;
1050 case OP_PUSHA:
1051 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
1052 return true;
1053 case OP_FXSAVE:
1054 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
1055 return true;
1056 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
1057 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
1058 return true;
1059 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
1060 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
1061 return true;
1062 case OP_MOVSWD:
1063 case OP_STOSWD:
1064 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
1065 && pRegFrame->rcx >= 0x40
1066 )
1067 {
1068 Assert(pCpu->mode == CPUMODE_64BIT);
1069
1070 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
1071 return true;
1072 }
1073 return false;
1074 }
1075 if ( (pCpu->param1.flags & USE_REG_GEN32)
1076 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
1077 {
1078 Log4(("pgmPoolMonitorIsReused: ESP\n"));
1079 return true;
1080 }
1081
1082 //if (pPage->fCR3Mix)
1083 // return false;
1084 return false;
1085}
1086
1087
1088/**
1089 * Flushes the page being accessed.
1090 *
1091 * @returns VBox status code suitable for scheduling.
1092 * @param pVM The VM handle.
1093 * @param pPool The pool.
1094 * @param pPage The pool page (head).
1095 * @param pCpu The disassembly of the write instruction.
1096 * @param pRegFrame The trap register frame.
1097 * @param GCPhysFault The fault address as guest physical address.
1098 * @param pvFault The fault address.
1099 */
1100static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1101 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1102{
1103 /*
1104 * First, do the flushing.
1105 */
1106 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
1107
1108 /*
1109 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
1110 */
1111 uint32_t cbWritten;
1112 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
1113 if (RT_SUCCESS(rc2))
1114 pRegFrame->rip += pCpu->opsize;
1115 else if (rc2 == VERR_EM_INTERPRETER)
1116 {
1117#ifdef IN_RC
1118 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
1119 {
1120 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
1121 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
1122 rc = VINF_SUCCESS;
1123 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
1124 }
1125 else
1126#endif
1127 {
1128 rc = VINF_EM_RAW_EMULATE_INSTR;
1129 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1130 }
1131 }
1132 else
1133 rc = rc2;
1134
1135 /* See use in pgmPoolAccessHandlerSimple(). */
1136 PGM_INVL_GUEST_TLBS();
1137
1138 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
1139 return rc;
1140
1141}
1142
1143
1144/**
1145 * Handles the STOSD write accesses.
1146 *
1147 * @returns VBox status code suitable for scheduling.
1148 * @param pVM The VM handle.
1149 * @param pPool The pool.
1150 * @param pPage The pool page (head).
1151 * @param pCpu The disassembly of the write instruction.
1152 * @param pRegFrame The trap register frame.
1153 * @param GCPhysFault The fault address as guest physical address.
1154 * @param pvFault The fault address.
1155 */
1156DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1157 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1158{
1159 Assert(pCpu->mode == CPUMODE_32BIT);
1160
1161 Log3(("pgmPoolAccessHandlerSTOSD\n"));
1162
1163 /*
1164 * Increment the modification counter and insert it into the list
1165 * of modified pages the first time.
1166 */
1167 if (!pPage->cModifications++)
1168 pgmPoolMonitorModifiedInsert(pPool, pPage);
1169
1170 /*
1171 * Execute REP STOSD.
1172 *
1173 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
1174 * write situation, meaning that it's safe to write here.
1175 */
1176#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1177 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1178#endif
1179 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
1180 while (pRegFrame->ecx)
1181 {
1182#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1183 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1184 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1185 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1186#else
1187 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1188#endif
1189#ifdef IN_RC
1190 *(uint32_t *)pu32 = pRegFrame->eax;
1191#else
1192 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
1193#endif
1194 pu32 += 4;
1195 GCPhysFault += 4;
1196 pRegFrame->edi += 4;
1197 pRegFrame->ecx--;
1198 }
1199 pRegFrame->rip += pCpu->opsize;
1200
1201 /* See use in pgmPoolAccessHandlerSimple(). */
1202 PGM_INVL_GUEST_TLBS();
1203
1204 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1205 return VINF_SUCCESS;
1206}
1207
1208
1209/**
1210 * Handles the simple write accesses.
1211 *
1212 * @returns VBox status code suitable for scheduling.
1213 * @param pVM The VM handle.
1214 * @param pPool The pool.
1215 * @param pPage The pool page (head).
1216 * @param pCpu The disassembly of the write instruction.
1217 * @param pRegFrame The trap register frame.
1218 * @param GCPhysFault The fault address as guest physical address.
1219 * @param pvFault The fault address.
1220 */
1221DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1222 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1223{
1224 Log3(("pgmPoolAccessHandlerSimple\n"));
1225 /*
1226 * Increment the modification counter and insert it into the list
1227 * of modified pages the first time.
1228 */
1229 if (!pPage->cModifications++)
1230 pgmPoolMonitorModifiedInsert(pPool, pPage);
1231
1232 /*
1233 * Clear all the pages. ASSUMES that pvFault is readable.
1234 */
1235#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1236 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1237 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1238 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1239 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1240#else
1241 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1242#endif
1243
1244 /*
1245 * Interpret the instruction.
1246 */
1247 uint32_t cb;
1248 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
1249 if (RT_SUCCESS(rc))
1250 pRegFrame->rip += pCpu->opsize;
1251 else if (rc == VERR_EM_INTERPRETER)
1252 {
1253 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1254 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
1255 rc = VINF_EM_RAW_EMULATE_INSTR;
1256 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1257 }
1258
1259 /*
1260 * Quick hack, with logging enabled we're getting stale
1261 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1262 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1263 * have to be fixed to support this. But that'll have to wait till next week.
1264 *
1265 * An alternative is to keep track of the changed PTEs together with the
1266 * GCPhys from the guest PT. This may proove expensive though.
1267 *
1268 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1269 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1270 */
1271 PGM_INVL_GUEST_TLBS();
1272
1273 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1274 return rc;
1275}
1276
1277/**
1278 * \#PF Handler callback for PT write accesses.
1279 *
1280 * @returns VBox status code (appropriate for GC return).
1281 * @param pVM VM Handle.
1282 * @param uErrorCode CPU Error code.
1283 * @param pRegFrame Trap register frame.
1284 * NULL on DMA and other non CPU access.
1285 * @param pvFault The fault address (cr2).
1286 * @param GCPhysFault The GC physical address corresponding to pvFault.
1287 * @param pvUser User argument.
1288 */
1289DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1290{
1291 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1292 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1293 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1294 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1295
1296 /*
1297 * We should ALWAYS have the list head as user parameter. This
1298 * is because we use that page to record the changes.
1299 */
1300 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1301
1302 /*
1303 * Disassemble the faulting instruction.
1304 */
1305 DISCPUSTATE Cpu;
1306 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1307 AssertRCReturn(rc, rc);
1308
1309 /*
1310 * Check if it's worth dealing with.
1311 */
1312 bool fReused = false;
1313 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1314#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1315 || pgmPoolIsActiveRootPage(pVM, pPage)
1316#else
1317 || pPage->fCR3Mix
1318#endif
1319 )
1320 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1321 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1322 {
1323 /*
1324 * Simple instructions, no REP prefix.
1325 */
1326 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1327 {
1328 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1329 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1330 return rc;
1331 }
1332
1333 /*
1334 * Windows is frequently doing small memset() operations (netio test 4k+).
1335 * We have to deal with these or we'll kill the cache and performance.
1336 */
1337 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1338 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1339 && pRegFrame->ecx <= 0x20
1340 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1341 && !((uintptr_t)pvFault & 3)
1342 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1343 && Cpu.mode == CPUMODE_32BIT
1344 && Cpu.opmode == CPUMODE_32BIT
1345 && Cpu.addrmode == CPUMODE_32BIT
1346 && Cpu.prefix == PREFIX_REP
1347 && !pRegFrame->eflags.Bits.u1DF
1348 )
1349 {
1350 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1351 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1352 return rc;
1353 }
1354
1355 /* REP prefix, don't bother. */
1356 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1357 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1358 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1359 }
1360
1361 /*
1362 * Not worth it, so flush it.
1363 *
1364 * If we considered it to be reused, don't to back to ring-3
1365 * to emulate failed instructions since we usually cannot
1366 * interpret then. This may be a bit risky, in which case
1367 * the reuse detection must be fixed.
1368 */
1369 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1370 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1371 rc = VINF_SUCCESS;
1372 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1373 return rc;
1374}
1375
1376# endif /* !IN_RING3 */
1377#endif /* PGMPOOL_WITH_MONITORING */
1378
1379#ifdef PGMPOOL_WITH_CACHE
1380
1381/**
1382 * Inserts a page into the GCPhys hash table.
1383 *
1384 * @param pPool The pool.
1385 * @param pPage The page.
1386 */
1387DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1388{
1389 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1390 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1391 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1392 pPage->iNext = pPool->aiHash[iHash];
1393 pPool->aiHash[iHash] = pPage->idx;
1394}
1395
1396
1397/**
1398 * Removes a page from the GCPhys hash table.
1399 *
1400 * @param pPool The pool.
1401 * @param pPage The page.
1402 */
1403DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1404{
1405 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1406 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1407 if (pPool->aiHash[iHash] == pPage->idx)
1408 pPool->aiHash[iHash] = pPage->iNext;
1409 else
1410 {
1411 uint16_t iPrev = pPool->aiHash[iHash];
1412 for (;;)
1413 {
1414 const int16_t i = pPool->aPages[iPrev].iNext;
1415 if (i == pPage->idx)
1416 {
1417 pPool->aPages[iPrev].iNext = pPage->iNext;
1418 break;
1419 }
1420 if (i == NIL_PGMPOOL_IDX)
1421 {
1422 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1423 break;
1424 }
1425 iPrev = i;
1426 }
1427 }
1428 pPage->iNext = NIL_PGMPOOL_IDX;
1429}
1430
1431
1432/**
1433 * Frees up one cache page.
1434 *
1435 * @returns VBox status code.
1436 * @retval VINF_SUCCESS on success.
1437 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1438 * @param pPool The pool.
1439 * @param iUser The user index.
1440 */
1441static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1442{
1443#ifndef IN_RC
1444 const PVM pVM = pPool->CTX_SUFF(pVM);
1445#endif
1446 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1447 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1448
1449 /*
1450 * Select one page from the tail of the age list.
1451 */
1452 uint16_t iToFree = pPool->iAgeTail;
1453 if (iToFree == iUser)
1454 iToFree = pPool->aPages[iToFree].iAgePrev;
1455/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1456 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1457 {
1458 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1459 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1460 {
1461 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1462 continue;
1463 iToFree = i;
1464 break;
1465 }
1466 }
1467*/
1468
1469 Assert(iToFree != iUser);
1470 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1471
1472 PPGMPOOLPAGE pPage = &pPool->aPages[iToFree];
1473
1474 /*
1475 * Reject any attempts at flushing the currently active shadow CR3 mapping
1476 */
1477 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
1478 {
1479 /* Refresh the cr3 mapping by putting it at the head of the age list. */
1480 LogFlow(("pgmPoolCacheFreeOne refuse CR3 mapping\n"));
1481 pgmPoolCacheUsed(pPool, pPage);
1482 return pgmPoolCacheFreeOne(pPool, iUser);
1483 }
1484
1485 int rc = pgmPoolFlushPage(pPool, pPage);
1486 if (rc == VINF_SUCCESS)
1487 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1488 return rc;
1489}
1490
1491
1492/**
1493 * Checks if a kind mismatch is really a page being reused
1494 * or if it's just normal remappings.
1495 *
1496 * @returns true if reused and the cached page (enmKind1) should be flushed
1497 * @returns false if not reused.
1498 * @param enmKind1 The kind of the cached page.
1499 * @param enmKind2 The kind of the requested page.
1500 */
1501static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1502{
1503 switch (enmKind1)
1504 {
1505 /*
1506 * Never reuse them. There is no remapping in non-paging mode.
1507 */
1508 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1509 case PGMPOOLKIND_32BIT_PD_PHYS:
1510 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1511 case PGMPOOLKIND_PAE_PD_PHYS:
1512 case PGMPOOLKIND_PAE_PDPT_PHYS:
1513 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1514 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1515 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1516 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1517 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1518#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1519 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1520 return false;
1521#else
1522 return true;
1523#endif
1524
1525 /*
1526 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1527 */
1528 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1529 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1530 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1531 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1532 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1533 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1534 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1535 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1536 case PGMPOOLKIND_32BIT_PD:
1537 switch (enmKind2)
1538 {
1539 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1540 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1541 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1542 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1543 case PGMPOOLKIND_64BIT_PML4:
1544 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1545 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1546 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1547 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1548 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1549 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1550 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1551 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1552 return true;
1553 default:
1554 return false;
1555 }
1556
1557 /*
1558 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1559 */
1560 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1561 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1562 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1563 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1564 case PGMPOOLKIND_64BIT_PML4:
1565 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1566 switch (enmKind2)
1567 {
1568 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1569 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1570 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1571 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1572 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1573 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1574 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1575 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1576 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1577 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1578 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1579 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1580 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1581 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1582 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1583 return true;
1584 default:
1585 return false;
1586 }
1587
1588 /*
1589 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1590 */
1591#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1592 case PGMPOOLKIND_ROOT_32BIT_PD:
1593 case PGMPOOLKIND_ROOT_PAE_PD:
1594 case PGMPOOLKIND_ROOT_PDPT:
1595#endif
1596 case PGMPOOLKIND_ROOT_NESTED:
1597 return false;
1598
1599 default:
1600 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1601 }
1602}
1603
1604
1605/**
1606 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1607 *
1608 * @returns VBox status code.
1609 * @retval VINF_PGM_CACHED_PAGE on success.
1610 * @retval VERR_FILE_NOT_FOUND if not found.
1611 * @param pPool The pool.
1612 * @param GCPhys The GC physical address of the page we're gonna shadow.
1613 * @param enmKind The kind of mapping.
1614 * @param iUser The shadow page pool index of the user table.
1615 * @param iUserTable The index into the user table (shadowed).
1616 * @param ppPage Where to store the pointer to the page.
1617 */
1618static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1619{
1620#ifndef IN_RC
1621 const PVM pVM = pPool->CTX_SUFF(pVM);
1622#endif
1623 /*
1624 * Look up the GCPhys in the hash.
1625 */
1626 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1627 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
1628 if (i != NIL_PGMPOOL_IDX)
1629 {
1630 do
1631 {
1632 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1633 Log3(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1634 if (pPage->GCPhys == GCPhys)
1635 {
1636 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1637 {
1638 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1639 if (RT_SUCCESS(rc))
1640 {
1641 *ppPage = pPage;
1642 STAM_COUNTER_INC(&pPool->StatCacheHits);
1643 return VINF_PGM_CACHED_PAGE;
1644 }
1645 return rc;
1646 }
1647
1648 /*
1649 * The kind is different. In some cases we should now flush the page
1650 * as it has been reused, but in most cases this is normal remapping
1651 * of PDs as PT or big pages using the GCPhys field in a slightly
1652 * different way than the other kinds.
1653 */
1654 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1655 {
1656 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1657 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1658 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1659 break;
1660 }
1661 }
1662
1663 /* next */
1664 i = pPage->iNext;
1665 } while (i != NIL_PGMPOOL_IDX);
1666 }
1667
1668 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
1669 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1670 return VERR_FILE_NOT_FOUND;
1671}
1672
1673
1674/**
1675 * Inserts a page into the cache.
1676 *
1677 * @param pPool The pool.
1678 * @param pPage The cached page.
1679 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1680 */
1681static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1682{
1683 /*
1684 * Insert into the GCPhys hash if the page is fit for that.
1685 */
1686 Assert(!pPage->fCached);
1687 if (fCanBeCached)
1688 {
1689 pPage->fCached = true;
1690 pgmPoolHashInsert(pPool, pPage);
1691 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1692 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1693 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1694 }
1695 else
1696 {
1697 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1698 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1699 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1700 }
1701
1702 /*
1703 * Insert at the head of the age list.
1704 */
1705 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1706 pPage->iAgeNext = pPool->iAgeHead;
1707 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1708 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1709 else
1710 pPool->iAgeTail = pPage->idx;
1711 pPool->iAgeHead = pPage->idx;
1712}
1713
1714
1715/**
1716 * Flushes a cached page.
1717 *
1718 * @param pPool The pool.
1719 * @param pPage The cached page.
1720 */
1721static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1722{
1723 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1724
1725 /*
1726 * Remove the page from the hash.
1727 */
1728 if (pPage->fCached)
1729 {
1730 pPage->fCached = false;
1731 pgmPoolHashRemove(pPool, pPage);
1732 }
1733 else
1734 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1735
1736 /*
1737 * Remove it from the age list.
1738 */
1739 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1740 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1741 else
1742 pPool->iAgeTail = pPage->iAgePrev;
1743 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1744 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1745 else
1746 pPool->iAgeHead = pPage->iAgeNext;
1747 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1748 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1749}
1750
1751#endif /* PGMPOOL_WITH_CACHE */
1752#ifdef PGMPOOL_WITH_MONITORING
1753
1754/**
1755 * Looks for pages sharing the monitor.
1756 *
1757 * @returns Pointer to the head page.
1758 * @returns NULL if not found.
1759 * @param pPool The Pool
1760 * @param pNewPage The page which is going to be monitored.
1761 */
1762static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1763{
1764#ifdef PGMPOOL_WITH_CACHE
1765 /*
1766 * Look up the GCPhys in the hash.
1767 */
1768 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1769 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1770 if (i == NIL_PGMPOOL_IDX)
1771 return NULL;
1772 do
1773 {
1774 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1775 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1776 && pPage != pNewPage)
1777 {
1778 switch (pPage->enmKind)
1779 {
1780 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1781 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1782 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1783 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1784 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1785 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1786 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1787 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1788 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1789 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1790 case PGMPOOLKIND_64BIT_PML4:
1791#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1792 case PGMPOOLKIND_32BIT_PD:
1793 case PGMPOOLKIND_PAE_PDPT:
1794#else
1795 case PGMPOOLKIND_ROOT_32BIT_PD:
1796 case PGMPOOLKIND_ROOT_PAE_PD:
1797 case PGMPOOLKIND_ROOT_PDPT:
1798#endif
1799 {
1800 /* find the head */
1801 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1802 {
1803 Assert(pPage->iMonitoredPrev != pPage->idx);
1804 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1805 }
1806 return pPage;
1807 }
1808
1809 /* ignore, no monitoring. */
1810 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1811 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1812 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1813 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1814 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1815 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1816 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1817 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1818 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1819 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1820 case PGMPOOLKIND_ROOT_NESTED:
1821 case PGMPOOLKIND_PAE_PD_PHYS:
1822 case PGMPOOLKIND_PAE_PDPT_PHYS:
1823 case PGMPOOLKIND_32BIT_PD_PHYS:
1824#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1825 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1826#endif
1827 break;
1828 default:
1829 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1830 }
1831 }
1832
1833 /* next */
1834 i = pPage->iNext;
1835 } while (i != NIL_PGMPOOL_IDX);
1836#endif
1837 return NULL;
1838}
1839
1840
1841/**
1842 * Enabled write monitoring of a guest page.
1843 *
1844 * @returns VBox status code.
1845 * @retval VINF_SUCCESS on success.
1846 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1847 * @param pPool The pool.
1848 * @param pPage The cached page.
1849 */
1850static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1851{
1852 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1853
1854 /*
1855 * Filter out the relevant kinds.
1856 */
1857 switch (pPage->enmKind)
1858 {
1859 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1860 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1861 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1862 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1863 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1864 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1865 case PGMPOOLKIND_64BIT_PML4:
1866#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1867 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1868 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1869 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1870 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1871 case PGMPOOLKIND_32BIT_PD:
1872 case PGMPOOLKIND_PAE_PDPT:
1873#else
1874 case PGMPOOLKIND_ROOT_PDPT:
1875#endif
1876 break;
1877
1878 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1879 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1880 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1881 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1882 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1883 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1884 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1885 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1886 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1887 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1888 case PGMPOOLKIND_ROOT_NESTED:
1889 /* Nothing to monitor here. */
1890 return VINF_SUCCESS;
1891
1892#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1893 case PGMPOOLKIND_32BIT_PD_PHYS:
1894 case PGMPOOLKIND_PAE_PDPT_PHYS:
1895 case PGMPOOLKIND_PAE_PD_PHYS:
1896 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1897 /* Nothing to monitor here. */
1898 return VINF_SUCCESS;
1899#else
1900 case PGMPOOLKIND_ROOT_32BIT_PD:
1901 case PGMPOOLKIND_ROOT_PAE_PD:
1902#endif
1903#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1904 break;
1905#else
1906 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1907#endif
1908 default:
1909 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1910 }
1911
1912 /*
1913 * Install handler.
1914 */
1915 int rc;
1916 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1917 if (pPageHead)
1918 {
1919 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1920 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1921 pPage->iMonitoredPrev = pPageHead->idx;
1922 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1923 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1924 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1925 pPageHead->iMonitoredNext = pPage->idx;
1926 rc = VINF_SUCCESS;
1927 }
1928 else
1929 {
1930 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1931 PVM pVM = pPool->CTX_SUFF(pVM);
1932 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1933 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1934 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1935 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1936 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1937 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1938 pPool->pszAccessHandler);
1939 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1940 * the heap size should suffice. */
1941 AssertFatalRC(rc);
1942 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1943 rc = VERR_PGM_POOL_CLEARED;
1944 }
1945 pPage->fMonitored = true;
1946 return rc;
1947}
1948
1949
1950/**
1951 * Disables write monitoring of a guest page.
1952 *
1953 * @returns VBox status code.
1954 * @retval VINF_SUCCESS on success.
1955 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1956 * @param pPool The pool.
1957 * @param pPage The cached page.
1958 */
1959static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1960{
1961 /*
1962 * Filter out the relevant kinds.
1963 */
1964 switch (pPage->enmKind)
1965 {
1966 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1967 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1968 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1969 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1970 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1971 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1972 case PGMPOOLKIND_64BIT_PML4:
1973#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1974 case PGMPOOLKIND_32BIT_PD:
1975 case PGMPOOLKIND_PAE_PDPT:
1976 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1977 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1978 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1979 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1980#else
1981 case PGMPOOLKIND_ROOT_PDPT:
1982#endif
1983 break;
1984
1985 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1986 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1987 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1988 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1989 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1990 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1991 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1992 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1993 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1994 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1995 case PGMPOOLKIND_ROOT_NESTED:
1996 case PGMPOOLKIND_PAE_PD_PHYS:
1997 case PGMPOOLKIND_PAE_PDPT_PHYS:
1998 case PGMPOOLKIND_32BIT_PD_PHYS:
1999 /* Nothing to monitor here. */
2000 return VINF_SUCCESS;
2001
2002#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2003 case PGMPOOLKIND_ROOT_32BIT_PD:
2004 case PGMPOOLKIND_ROOT_PAE_PD:
2005#endif
2006#ifdef PGMPOOL_WITH_MIXED_PT_CR3
2007 break;
2008#endif
2009#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2010 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2011#endif
2012 default:
2013 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2014 }
2015
2016 /*
2017 * Remove the page from the monitored list or uninstall it if last.
2018 */
2019 const PVM pVM = pPool->CTX_SUFF(pVM);
2020 int rc;
2021 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2022 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2023 {
2024 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2025 {
2026 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2027 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2028#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2029 pNewHead->fCR3Mix = pPage->fCR3Mix;
2030#endif
2031 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2032 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2033 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2034 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2035 pPool->pszAccessHandler);
2036 AssertFatalRCSuccess(rc);
2037 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2038 }
2039 else
2040 {
2041 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2042 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2043 {
2044 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2045 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2046 }
2047 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2048 rc = VINF_SUCCESS;
2049 }
2050 }
2051 else
2052 {
2053 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2054 AssertFatalRC(rc);
2055 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2056 rc = VERR_PGM_POOL_CLEARED;
2057 }
2058 pPage->fMonitored = false;
2059
2060 /*
2061 * Remove it from the list of modified pages (if in it).
2062 */
2063 pgmPoolMonitorModifiedRemove(pPool, pPage);
2064
2065 return rc;
2066}
2067
2068# if defined(PGMPOOL_WITH_MIXED_PT_CR3) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2069
2070/**
2071 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
2072 *
2073 * @param pPool The Pool.
2074 * @param pPage A page in the chain.
2075 * @param fCR3Mix The new fCR3Mix value.
2076 */
2077static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
2078{
2079 /* current */
2080 pPage->fCR3Mix = fCR3Mix;
2081
2082 /* before */
2083 int16_t idx = pPage->iMonitoredPrev;
2084 while (idx != NIL_PGMPOOL_IDX)
2085 {
2086 pPool->aPages[idx].fCR3Mix = fCR3Mix;
2087 idx = pPool->aPages[idx].iMonitoredPrev;
2088 }
2089
2090 /* after */
2091 idx = pPage->iMonitoredNext;
2092 while (idx != NIL_PGMPOOL_IDX)
2093 {
2094 pPool->aPages[idx].fCR3Mix = fCR3Mix;
2095 idx = pPool->aPages[idx].iMonitoredNext;
2096 }
2097}
2098
2099
2100/**
2101 * Installs or modifies monitoring of a CR3 page (special).
2102 *
2103 * We're pretending the CR3 page is shadowed by the pool so we can use the
2104 * generic mechanisms in detecting chained monitoring. (This also gives us a
2105 * tast of what code changes are required to really pool CR3 shadow pages.)
2106 *
2107 * @returns VBox status code.
2108 * @param pPool The pool.
2109 * @param idxRoot The CR3 (root) page index.
2110 * @param GCPhysCR3 The (new) CR3 value.
2111 */
2112int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
2113{
2114 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
2115 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
2116 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d} GCPhysCR3=%RGp\n",
2117 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
2118
2119 /*
2120 * The unlikely case where it already matches.
2121 */
2122 if (pPage->GCPhys == GCPhysCR3)
2123 {
2124 Assert(pPage->fMonitored);
2125 return VINF_SUCCESS;
2126 }
2127
2128 /*
2129 * Flush the current monitoring and remove it from the hash.
2130 */
2131 int rc = VINF_SUCCESS;
2132 if (pPage->fMonitored)
2133 {
2134 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
2135 rc = pgmPoolMonitorFlush(pPool, pPage);
2136 if (rc == VERR_PGM_POOL_CLEARED)
2137 rc = VINF_SUCCESS;
2138 else
2139 AssertFatalRC(rc);
2140 pgmPoolHashRemove(pPool, pPage);
2141 }
2142
2143 /*
2144 * Monitor the page at the new location and insert it into the hash.
2145 */
2146 pPage->GCPhys = GCPhysCR3;
2147 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
2148 if (rc2 != VERR_PGM_POOL_CLEARED)
2149 {
2150 AssertFatalRC(rc2);
2151 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
2152 rc = rc2;
2153 }
2154 pgmPoolHashInsert(pPool, pPage);
2155 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
2156 return rc;
2157}
2158
2159
2160/**
2161 * Removes the monitoring of a CR3 page (special).
2162 *
2163 * @returns VBox status code.
2164 * @param pPool The pool.
2165 * @param idxRoot The CR3 (root) page index.
2166 */
2167int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
2168{
2169 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
2170 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
2171 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d}\n",
2172 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
2173
2174 if (!pPage->fMonitored)
2175 return VINF_SUCCESS;
2176
2177 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
2178 int rc = pgmPoolMonitorFlush(pPool, pPage);
2179 if (rc != VERR_PGM_POOL_CLEARED)
2180 AssertFatalRC(rc);
2181 else
2182 rc = VINF_SUCCESS;
2183 pgmPoolHashRemove(pPool, pPage);
2184 Assert(!pPage->fMonitored);
2185 pPage->GCPhys = NIL_RTGCPHYS;
2186 return rc;
2187}
2188
2189# endif /* PGMPOOL_WITH_MIXED_PT_CR3 && !VBOX_WITH_PGMPOOL_PAGING_ONLY*/
2190
2191/**
2192 * Inserts the page into the list of modified pages.
2193 *
2194 * @param pPool The pool.
2195 * @param pPage The page.
2196 */
2197void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2198{
2199 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2200 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2201 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2202 && pPool->iModifiedHead != pPage->idx,
2203 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2204 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2205 pPool->iModifiedHead, pPool->cModifiedPages));
2206
2207 pPage->iModifiedNext = pPool->iModifiedHead;
2208 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2209 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2210 pPool->iModifiedHead = pPage->idx;
2211 pPool->cModifiedPages++;
2212#ifdef VBOX_WITH_STATISTICS
2213 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2214 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2215#endif
2216}
2217
2218
2219/**
2220 * Removes the page from the list of modified pages and resets the
2221 * moficiation counter.
2222 *
2223 * @param pPool The pool.
2224 * @param pPage The page which is believed to be in the list of modified pages.
2225 */
2226static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2227{
2228 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2229 if (pPool->iModifiedHead == pPage->idx)
2230 {
2231 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2232 pPool->iModifiedHead = pPage->iModifiedNext;
2233 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2234 {
2235 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2236 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2237 }
2238 pPool->cModifiedPages--;
2239 }
2240 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2241 {
2242 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2243 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2244 {
2245 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2246 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2247 }
2248 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2249 pPool->cModifiedPages--;
2250 }
2251 else
2252 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2253 pPage->cModifications = 0;
2254}
2255
2256
2257/**
2258 * Zaps the list of modified pages, resetting their modification counters in the process.
2259 *
2260 * @param pVM The VM handle.
2261 */
2262void pgmPoolMonitorModifiedClearAll(PVM pVM)
2263{
2264 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2265 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2266
2267 unsigned cPages = 0; NOREF(cPages);
2268 uint16_t idx = pPool->iModifiedHead;
2269 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2270 while (idx != NIL_PGMPOOL_IDX)
2271 {
2272 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2273 idx = pPage->iModifiedNext;
2274 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2275 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2276 pPage->cModifications = 0;
2277 Assert(++cPages);
2278 }
2279 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2280 pPool->cModifiedPages = 0;
2281}
2282
2283
2284#ifdef IN_RING3
2285/**
2286 * Clear all shadow pages and clear all modification counters.
2287 *
2288 * @param pVM The VM handle.
2289 * @remark Should only be used when monitoring is available, thus placed in
2290 * the PGMPOOL_WITH_MONITORING #ifdef.
2291 */
2292void pgmPoolClearAll(PVM pVM)
2293{
2294 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2295 STAM_PROFILE_START(&pPool->StatClearAll, c);
2296 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2297
2298 /*
2299 * Iterate all the pages until we've encountered all that in use.
2300 * This is simple but not quite optimal solution.
2301 */
2302 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2303 unsigned cLeft = pPool->cUsedPages;
2304 unsigned iPage = pPool->cCurPages;
2305 while (--iPage >= PGMPOOL_IDX_FIRST)
2306 {
2307 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2308 if (pPage->GCPhys != NIL_RTGCPHYS)
2309 {
2310 switch (pPage->enmKind)
2311 {
2312 /*
2313 * We only care about shadow page tables.
2314 */
2315 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2316 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2317 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2318 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2319 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2320 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2321 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2322 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2323 {
2324#ifdef PGMPOOL_WITH_USER_TRACKING
2325 if (pPage->cPresent)
2326#endif
2327 {
2328 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2329 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2330 ASMMemZeroPage(pvShw);
2331 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2332#ifdef PGMPOOL_WITH_USER_TRACKING
2333 pPage->cPresent = 0;
2334 pPage->iFirstPresent = ~0;
2335#endif
2336 }
2337 }
2338 /* fall thru */
2339
2340 default:
2341 Assert(!pPage->cModifications || ++cModifiedPages);
2342 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2343 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2344 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2345 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2346 pPage->cModifications = 0;
2347 break;
2348
2349 }
2350 if (!--cLeft)
2351 break;
2352 }
2353 }
2354
2355 /* swipe the special pages too. */
2356 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2357 {
2358 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2359 if (pPage->GCPhys != NIL_RTGCPHYS)
2360 {
2361 Assert(!pPage->cModifications || ++cModifiedPages);
2362 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2363 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2364 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2365 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2366 pPage->cModifications = 0;
2367 }
2368 }
2369
2370#ifndef DEBUG_michael
2371 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2372#endif
2373 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2374 pPool->cModifiedPages = 0;
2375
2376#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2377 /*
2378 * Clear all the GCPhys links and rebuild the phys ext free list.
2379 */
2380 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2381 pRam;
2382 pRam = pRam->CTX_SUFF(pNext))
2383 {
2384 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2385 while (iPage-- > 0)
2386 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2387 }
2388
2389 pPool->iPhysExtFreeHead = 0;
2390 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2391 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2392 for (unsigned i = 0; i < cMaxPhysExts; i++)
2393 {
2394 paPhysExts[i].iNext = i + 1;
2395 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2396 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2397 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2398 }
2399 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2400#endif
2401
2402
2403 pPool->cPresent = 0;
2404 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2405}
2406#endif /* IN_RING3 */
2407
2408
2409/**
2410 * Handle SyncCR3 pool tasks
2411 *
2412 * @returns VBox status code.
2413 * @retval VINF_SUCCESS if successfully added.
2414 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2415 * @param pVM The VM handle.
2416 * @remark Should only be used when monitoring is available, thus placed in
2417 * the PGMPOOL_WITH_MONITORING #ifdef.
2418 */
2419int pgmPoolSyncCR3(PVM pVM)
2420{
2421 LogFlow(("pgmPoolSyncCR3\n"));
2422 /*
2423 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2424 * Occasionally we will have to clear all the shadow page tables because we wanted
2425 * to monitor a page which was mapped by too many shadowed page tables. This operation
2426 * sometimes refered to as a 'lightweight flush'.
2427 */
2428 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2429 pgmPoolMonitorModifiedClearAll(pVM);
2430 else
2431 {
2432# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2433 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2434 pgmPoolClearAll(pVM);
2435# else /* !IN_RING3 */
2436 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2437 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2438 return VINF_PGM_SYNC_CR3;
2439# endif /* !IN_RING3 */
2440 }
2441 return VINF_SUCCESS;
2442}
2443
2444#endif /* PGMPOOL_WITH_MONITORING */
2445#ifdef PGMPOOL_WITH_USER_TRACKING
2446
2447/**
2448 * Frees up at least one user entry.
2449 *
2450 * @returns VBox status code.
2451 * @retval VINF_SUCCESS if successfully added.
2452 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2453 * @param pPool The pool.
2454 * @param iUser The user index.
2455 */
2456static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2457{
2458 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2459#ifdef PGMPOOL_WITH_CACHE
2460 /*
2461 * Just free cached pages in a braindead fashion.
2462 */
2463 /** @todo walk the age list backwards and free the first with usage. */
2464 int rc = VINF_SUCCESS;
2465 do
2466 {
2467 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2468 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2469 rc = rc2;
2470 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2471 return rc;
2472#else
2473 /*
2474 * Lazy approach.
2475 */
2476 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2477 Assert(!CPUMIsGuestInLongMode(pVM));
2478 pgmPoolFlushAllInt(pPool);
2479 return VERR_PGM_POOL_FLUSHED;
2480#endif
2481}
2482
2483
2484/**
2485 * Inserts a page into the cache.
2486 *
2487 * This will create user node for the page, insert it into the GCPhys
2488 * hash, and insert it into the age list.
2489 *
2490 * @returns VBox status code.
2491 * @retval VINF_SUCCESS if successfully added.
2492 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2493 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2494 * @param pPool The pool.
2495 * @param pPage The cached page.
2496 * @param GCPhys The GC physical address of the page we're gonna shadow.
2497 * @param iUser The user index.
2498 * @param iUserTable The user table index.
2499 */
2500DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2501{
2502 int rc = VINF_SUCCESS;
2503 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2504
2505 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2506
2507#ifdef VBOX_STRICT
2508 /*
2509 * Check that the entry doesn't already exists.
2510 */
2511 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2512 {
2513 uint16_t i = pPage->iUserHead;
2514 do
2515 {
2516 Assert(i < pPool->cMaxUsers);
2517 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2518 i = paUsers[i].iNext;
2519 } while (i != NIL_PGMPOOL_USER_INDEX);
2520 }
2521#endif
2522
2523 /*
2524 * Find free a user node.
2525 */
2526 uint16_t i = pPool->iUserFreeHead;
2527 if (i == NIL_PGMPOOL_USER_INDEX)
2528 {
2529 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2530 if (RT_FAILURE(rc))
2531 return rc;
2532 i = pPool->iUserFreeHead;
2533 }
2534
2535 /*
2536 * Unlink the user node from the free list,
2537 * initialize and insert it into the user list.
2538 */
2539 pPool->iUserFreeHead = paUsers[i].iNext;
2540 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2541 paUsers[i].iUser = iUser;
2542 paUsers[i].iUserTable = iUserTable;
2543 pPage->iUserHead = i;
2544
2545 /*
2546 * Insert into cache and enable monitoring of the guest page if enabled.
2547 *
2548 * Until we implement caching of all levels, including the CR3 one, we'll
2549 * have to make sure we don't try monitor & cache any recursive reuse of
2550 * a monitored CR3 page. Because all windows versions are doing this we'll
2551 * have to be able to do combined access monitoring, CR3 + PT and
2552 * PD + PT (guest PAE).
2553 *
2554 * Update:
2555 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2556 */
2557#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2558# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2559 const bool fCanBeMonitored = true;
2560# else
2561 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2562 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2563 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2564# endif
2565# ifdef PGMPOOL_WITH_CACHE
2566 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2567# endif
2568 if (fCanBeMonitored)
2569 {
2570# ifdef PGMPOOL_WITH_MONITORING
2571 rc = pgmPoolMonitorInsert(pPool, pPage);
2572 if (rc == VERR_PGM_POOL_CLEARED)
2573 {
2574 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2575# ifndef PGMPOOL_WITH_CACHE
2576 pgmPoolMonitorFlush(pPool, pPage);
2577 rc = VERR_PGM_POOL_FLUSHED;
2578# endif
2579 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2580 paUsers[i].iNext = pPool->iUserFreeHead;
2581 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2582 pPool->iUserFreeHead = i;
2583 }
2584 }
2585# endif
2586#endif /* PGMPOOL_WITH_MONITORING */
2587 return rc;
2588}
2589
2590
2591# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2592/**
2593 * Adds a user reference to a page.
2594 *
2595 * This will move the page to the head of the
2596 *
2597 * @returns VBox status code.
2598 * @retval VINF_SUCCESS if successfully added.
2599 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2600 * @param pPool The pool.
2601 * @param pPage The cached page.
2602 * @param iUser The user index.
2603 * @param iUserTable The user table.
2604 */
2605static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2606{
2607 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2608
2609 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2610# ifdef VBOX_STRICT
2611 /*
2612 * Check that the entry doesn't already exists.
2613 */
2614 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2615 {
2616 uint16_t i = pPage->iUserHead;
2617 do
2618 {
2619 Assert(i < pPool->cMaxUsers);
2620 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2621 i = paUsers[i].iNext;
2622 } while (i != NIL_PGMPOOL_USER_INDEX);
2623 }
2624# endif
2625
2626 /*
2627 * Allocate a user node.
2628 */
2629 uint16_t i = pPool->iUserFreeHead;
2630 if (i == NIL_PGMPOOL_USER_INDEX)
2631 {
2632 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2633 if (RT_FAILURE(rc))
2634 return rc;
2635 i = pPool->iUserFreeHead;
2636 }
2637 pPool->iUserFreeHead = paUsers[i].iNext;
2638
2639 /*
2640 * Initialize the user node and insert it.
2641 */
2642 paUsers[i].iNext = pPage->iUserHead;
2643 paUsers[i].iUser = iUser;
2644 paUsers[i].iUserTable = iUserTable;
2645 pPage->iUserHead = i;
2646
2647# ifdef PGMPOOL_WITH_CACHE
2648 /*
2649 * Tell the cache to update its replacement stats for this page.
2650 */
2651 pgmPoolCacheUsed(pPool, pPage);
2652# endif
2653 return VINF_SUCCESS;
2654}
2655# endif /* PGMPOOL_WITH_CACHE */
2656
2657
2658/**
2659 * Frees a user record associated with a page.
2660 *
2661 * This does not clear the entry in the user table, it simply replaces the
2662 * user record to the chain of free records.
2663 *
2664 * @param pPool The pool.
2665 * @param HCPhys The HC physical address of the shadow page.
2666 * @param iUser The shadow page pool index of the user table.
2667 * @param iUserTable The index into the user table (shadowed).
2668 */
2669static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2670{
2671 /*
2672 * Unlink and free the specified user entry.
2673 */
2674 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2675
2676 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2677 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2678 uint16_t i = pPage->iUserHead;
2679 if ( i != NIL_PGMPOOL_USER_INDEX
2680 && paUsers[i].iUser == iUser
2681 && paUsers[i].iUserTable == iUserTable)
2682 {
2683 pPage->iUserHead = paUsers[i].iNext;
2684
2685 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2686 paUsers[i].iNext = pPool->iUserFreeHead;
2687 pPool->iUserFreeHead = i;
2688 return;
2689 }
2690
2691 /* General: Linear search. */
2692 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2693 while (i != NIL_PGMPOOL_USER_INDEX)
2694 {
2695 if ( paUsers[i].iUser == iUser
2696 && paUsers[i].iUserTable == iUserTable)
2697 {
2698 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2699 paUsers[iPrev].iNext = paUsers[i].iNext;
2700 else
2701 pPage->iUserHead = paUsers[i].iNext;
2702
2703 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2704 paUsers[i].iNext = pPool->iUserFreeHead;
2705 pPool->iUserFreeHead = i;
2706 return;
2707 }
2708 iPrev = i;
2709 i = paUsers[i].iNext;
2710 }
2711
2712 /* Fatal: didn't find it */
2713 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2714 iUser, iUserTable, pPage->GCPhys));
2715}
2716
2717
2718/**
2719 * Gets the entry size of a shadow table.
2720 *
2721 * @param enmKind The kind of page.
2722 *
2723 * @returns The size of the entry in bytes. That is, 4 or 8.
2724 * @returns If the kind is not for a table, an assertion is raised and 0 is
2725 * returned.
2726 */
2727DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2728{
2729 switch (enmKind)
2730 {
2731 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2732 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2733 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2734#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2735 case PGMPOOLKIND_32BIT_PD:
2736 case PGMPOOLKIND_32BIT_PD_PHYS:
2737#else
2738 case PGMPOOLKIND_ROOT_32BIT_PD:
2739#endif
2740 return 4;
2741
2742 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2743 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2744 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2745 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2746 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2747 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2748 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2749 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2750 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2751 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2752 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2753 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2754 case PGMPOOLKIND_64BIT_PML4:
2755#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2756 case PGMPOOLKIND_ROOT_PAE_PD:
2757 case PGMPOOLKIND_ROOT_PDPT:
2758#endif
2759 case PGMPOOLKIND_PAE_PDPT:
2760 case PGMPOOLKIND_ROOT_NESTED:
2761 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2762 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2763 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2764 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2765 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2766 case PGMPOOLKIND_PAE_PD_PHYS:
2767 case PGMPOOLKIND_PAE_PDPT_PHYS:
2768 return 8;
2769
2770 default:
2771 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2772 }
2773}
2774
2775
2776/**
2777 * Gets the entry size of a guest table.
2778 *
2779 * @param enmKind The kind of page.
2780 *
2781 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2782 * @returns If the kind is not for a table, an assertion is raised and 0 is
2783 * returned.
2784 */
2785DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2786{
2787 switch (enmKind)
2788 {
2789 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2790 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2791#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2792 case PGMPOOLKIND_32BIT_PD:
2793#else
2794 case PGMPOOLKIND_ROOT_32BIT_PD:
2795#endif
2796 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2797 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2798 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2799 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2800 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2801 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2802 return 4;
2803
2804 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2805 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2806 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2807 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2808 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2809 case PGMPOOLKIND_64BIT_PML4:
2810#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2811 case PGMPOOLKIND_PAE_PDPT:
2812#else
2813 case PGMPOOLKIND_ROOT_PAE_PD:
2814 case PGMPOOLKIND_ROOT_PDPT:
2815#endif
2816 return 8;
2817
2818 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2819 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2820 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2821 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2822 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2823 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2824 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2825 case PGMPOOLKIND_ROOT_NESTED:
2826 case PGMPOOLKIND_PAE_PD_PHYS:
2827 case PGMPOOLKIND_PAE_PDPT_PHYS:
2828 case PGMPOOLKIND_32BIT_PD_PHYS:
2829 /** @todo can we return 0? (nobody is calling this...) */
2830 AssertFailed();
2831 return 0;
2832
2833 default:
2834 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2835 }
2836}
2837
2838#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2839
2840/**
2841 * Scans one shadow page table for mappings of a physical page.
2842 *
2843 * @param pVM The VM handle.
2844 * @param pPhysPage The guest page in question.
2845 * @param iShw The shadow page table.
2846 * @param cRefs The number of references made in that PT.
2847 */
2848static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2849{
2850 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2851 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2852
2853 /*
2854 * Assert sanity.
2855 */
2856 Assert(cRefs == 1);
2857 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2858 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2859
2860 /*
2861 * Then, clear the actual mappings to the page in the shadow PT.
2862 */
2863 switch (pPage->enmKind)
2864 {
2865 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2866 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2867 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2868 {
2869 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2870 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2871 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2872 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2873 {
2874 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2875 pPT->a[i].u = 0;
2876 cRefs--;
2877 if (!cRefs)
2878 return;
2879 }
2880#ifdef LOG_ENABLED
2881 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2882 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2883 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2884 {
2885 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2886 pPT->a[i].u = 0;
2887 }
2888#endif
2889 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2890 break;
2891 }
2892
2893 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2894 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2895 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2896 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2897 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2898 {
2899 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2900 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2901 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2902 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2903 {
2904 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2905 pPT->a[i].u = 0;
2906 cRefs--;
2907 if (!cRefs)
2908 return;
2909 }
2910#ifdef LOG_ENABLED
2911 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2912 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2913 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2914 {
2915 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2916 pPT->a[i].u = 0;
2917 }
2918#endif
2919 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
2920 break;
2921 }
2922
2923 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2924 {
2925 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2926 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2927 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2928 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2929 {
2930 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2931 pPT->a[i].u = 0;
2932 cRefs--;
2933 if (!cRefs)
2934 return;
2935 }
2936#ifdef LOG_ENABLED
2937 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2938 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2939 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2940 {
2941 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2942 pPT->a[i].u = 0;
2943 }
2944#endif
2945 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2946 break;
2947 }
2948
2949 default:
2950 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2951 }
2952}
2953
2954
2955/**
2956 * Scans one shadow page table for mappings of a physical page.
2957 *
2958 * @param pVM The VM handle.
2959 * @param pPhysPage The guest page in question.
2960 * @param iShw The shadow page table.
2961 * @param cRefs The number of references made in that PT.
2962 */
2963void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2964{
2965 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2966 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2967 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2968 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2969 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2970 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2971}
2972
2973
2974/**
2975 * Flushes a list of shadow page tables mapping the same physical page.
2976 *
2977 * @param pVM The VM handle.
2978 * @param pPhysPage The guest page in question.
2979 * @param iPhysExt The physical cross reference extent list to flush.
2980 */
2981void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2982{
2983 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2984 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2985 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2986
2987 const uint16_t iPhysExtStart = iPhysExt;
2988 PPGMPOOLPHYSEXT pPhysExt;
2989 do
2990 {
2991 Assert(iPhysExt < pPool->cMaxPhysExts);
2992 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2993 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2994 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2995 {
2996 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2997 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2998 }
2999
3000 /* next */
3001 iPhysExt = pPhysExt->iNext;
3002 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3003
3004 /* insert the list into the free list and clear the ram range entry. */
3005 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3006 pPool->iPhysExtFreeHead = iPhysExtStart;
3007 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3008
3009 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3010}
3011
3012#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3013
3014/**
3015 * Scans all shadow page tables for mappings of a physical page.
3016 *
3017 * This may be slow, but it's most likely more efficient than cleaning
3018 * out the entire page pool / cache.
3019 *
3020 * @returns VBox status code.
3021 * @retval VINF_SUCCESS if all references has been successfully cleared.
3022 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3023 * a page pool cleaning.
3024 *
3025 * @param pVM The VM handle.
3026 * @param pPhysPage The guest page in question.
3027 */
3028int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3029{
3030 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3031 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3032 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
3033 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
3034
3035#if 1
3036 /*
3037 * There is a limit to what makes sense.
3038 */
3039 if (pPool->cPresent > 1024)
3040 {
3041 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3042 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3043 return VINF_PGM_GCPHYS_ALIASED;
3044 }
3045#endif
3046
3047 /*
3048 * Iterate all the pages until we've encountered all that in use.
3049 * This is simple but not quite optimal solution.
3050 */
3051 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3052 const uint32_t u32 = u64;
3053 unsigned cLeft = pPool->cUsedPages;
3054 unsigned iPage = pPool->cCurPages;
3055 while (--iPage >= PGMPOOL_IDX_FIRST)
3056 {
3057 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3058 if (pPage->GCPhys != NIL_RTGCPHYS)
3059 {
3060 switch (pPage->enmKind)
3061 {
3062 /*
3063 * We only care about shadow page tables.
3064 */
3065 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3066 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3067 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3068 {
3069 unsigned cPresent = pPage->cPresent;
3070 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3071 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3072 if (pPT->a[i].n.u1Present)
3073 {
3074 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3075 {
3076 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3077 pPT->a[i].u = 0;
3078 }
3079 if (!--cPresent)
3080 break;
3081 }
3082 break;
3083 }
3084
3085 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3086 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3087 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3088 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3089 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3090 {
3091 unsigned cPresent = pPage->cPresent;
3092 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3093 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3094 if (pPT->a[i].n.u1Present)
3095 {
3096 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3097 {
3098 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3099 pPT->a[i].u = 0;
3100 }
3101 if (!--cPresent)
3102 break;
3103 }
3104 break;
3105 }
3106 }
3107 if (!--cLeft)
3108 break;
3109 }
3110 }
3111
3112 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3113 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3114 return VINF_SUCCESS;
3115}
3116
3117
3118/**
3119 * Clears the user entry in a user table.
3120 *
3121 * This is used to remove all references to a page when flushing it.
3122 */
3123static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3124{
3125 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3126 Assert(pUser->iUser < pPool->cCurPages);
3127 uint32_t iUserTable = pUser->iUserTable;
3128
3129 /*
3130 * Map the user page.
3131 */
3132 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3133#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3134 if (pUserPage->enmKind == PGMPOOLKIND_ROOT_PAE_PD)
3135 {
3136 /* Must translate the fake 2048 entry PD to a 512 PD one since the R0 mapping is not linear. */
3137 Assert(pUser->iUser == PGMPOOL_IDX_PAE_PD);
3138 uint32_t iPdpt = iUserTable / X86_PG_PAE_ENTRIES;
3139 iUserTable %= X86_PG_PAE_ENTRIES;
3140 pUserPage = &pPool->aPages[PGMPOOL_IDX_PAE_PD_0 + iPdpt];
3141 Assert(pUserPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD);
3142 }
3143#endif
3144 union
3145 {
3146 uint64_t *pau64;
3147 uint32_t *pau32;
3148 } u;
3149 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3150
3151 /* Safety precaution in case we change the paging for other modes too in the future. */
3152 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
3153
3154#ifdef VBOX_STRICT
3155 /*
3156 * Some sanity checks.
3157 */
3158 switch (pUserPage->enmKind)
3159 {
3160# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3161 case PGMPOOLKIND_32BIT_PD:
3162 case PGMPOOLKIND_32BIT_PD_PHYS:
3163 Assert(iUserTable < X86_PG_ENTRIES);
3164 break;
3165# else
3166 case PGMPOOLKIND_ROOT_32BIT_PD:
3167 Assert(iUserTable < X86_PG_ENTRIES);
3168 Assert(!(u.pau32[iUserTable] & PGM_PDFLAGS_MAPPING));
3169 break;
3170# endif
3171# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3172 case PGMPOOLKIND_ROOT_PAE_PD:
3173 Assert(iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
3174 AssertMsg(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING), ("%llx %d\n", u.pau64[iUserTable], iUserTable));
3175 break;
3176# endif
3177# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3178 case PGMPOOLKIND_PAE_PDPT:
3179 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3180 case PGMPOOLKIND_PAE_PDPT_PHYS:
3181# else
3182 case PGMPOOLKIND_ROOT_PDPT:
3183# endif
3184 Assert(iUserTable < 4);
3185 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3186 break;
3187 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3188 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3189 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3190 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3191 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3192 case PGMPOOLKIND_PAE_PD_PHYS:
3193 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3194 break;
3195 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3196 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3197 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3198 break;
3199 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3200 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3201 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3202 break;
3203 case PGMPOOLKIND_64BIT_PML4:
3204 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3205 /* GCPhys >> PAGE_SHIFT is the index here */
3206 break;
3207 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3208 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3209 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3210 break;
3211
3212 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3213 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3214 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3215 break;
3216
3217 case PGMPOOLKIND_ROOT_NESTED:
3218 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3219 break;
3220
3221 default:
3222 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3223 break;
3224 }
3225#endif /* VBOX_STRICT */
3226
3227 /*
3228 * Clear the entry in the user page.
3229 */
3230 switch (pUserPage->enmKind)
3231 {
3232 /* 32-bit entries */
3233#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3234 case PGMPOOLKIND_32BIT_PD:
3235 case PGMPOOLKIND_32BIT_PD_PHYS:
3236#else
3237 case PGMPOOLKIND_ROOT_32BIT_PD:
3238#endif
3239 u.pau32[iUserTable] = 0;
3240 break;
3241
3242 /* 64-bit entries */
3243 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3244 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3245 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3246 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3247 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3248 case PGMPOOLKIND_PAE_PD_PHYS:
3249 case PGMPOOLKIND_PAE_PDPT_PHYS:
3250 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3251 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3252 case PGMPOOLKIND_64BIT_PML4:
3253 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3254 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3255# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3256 case PGMPOOLKIND_ROOT_PAE_PD:
3257#endif
3258#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3259 case PGMPOOLKIND_PAE_PDPT:
3260 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3261#else
3262 case PGMPOOLKIND_ROOT_PDPT:
3263#endif
3264 case PGMPOOLKIND_ROOT_NESTED:
3265 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3266 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3267 u.pau64[iUserTable] = 0;
3268 break;
3269
3270 default:
3271 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3272 }
3273}
3274
3275
3276/**
3277 * Clears all users of a page.
3278 */
3279static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3280{
3281 /*
3282 * Free all the user records.
3283 */
3284 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3285
3286 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3287 uint16_t i = pPage->iUserHead;
3288 while (i != NIL_PGMPOOL_USER_INDEX)
3289 {
3290 /* Clear enter in user table. */
3291 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3292
3293 /* Free it. */
3294 const uint16_t iNext = paUsers[i].iNext;
3295 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3296 paUsers[i].iNext = pPool->iUserFreeHead;
3297 pPool->iUserFreeHead = i;
3298
3299 /* Next. */
3300 i = iNext;
3301 }
3302 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3303}
3304
3305#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3306
3307/**
3308 * Allocates a new physical cross reference extent.
3309 *
3310 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3311 * @param pVM The VM handle.
3312 * @param piPhysExt Where to store the phys ext index.
3313 */
3314PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3315{
3316 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3317 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3318 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3319 {
3320 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3321 return NULL;
3322 }
3323 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3324 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3325 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3326 *piPhysExt = iPhysExt;
3327 return pPhysExt;
3328}
3329
3330
3331/**
3332 * Frees a physical cross reference extent.
3333 *
3334 * @param pVM The VM handle.
3335 * @param iPhysExt The extent to free.
3336 */
3337void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3338{
3339 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3340 Assert(iPhysExt < pPool->cMaxPhysExts);
3341 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3342 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3343 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3344 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3345 pPool->iPhysExtFreeHead = iPhysExt;
3346}
3347
3348
3349/**
3350 * Frees a physical cross reference extent.
3351 *
3352 * @param pVM The VM handle.
3353 * @param iPhysExt The extent to free.
3354 */
3355void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3356{
3357 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3358
3359 const uint16_t iPhysExtStart = iPhysExt;
3360 PPGMPOOLPHYSEXT pPhysExt;
3361 do
3362 {
3363 Assert(iPhysExt < pPool->cMaxPhysExts);
3364 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3365 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3366 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3367
3368 /* next */
3369 iPhysExt = pPhysExt->iNext;
3370 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3371
3372 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3373 pPool->iPhysExtFreeHead = iPhysExtStart;
3374}
3375
3376
3377/**
3378 * Insert a reference into a list of physical cross reference extents.
3379 *
3380 * @returns The new ram range flags (top 16-bits).
3381 *
3382 * @param pVM The VM handle.
3383 * @param iPhysExt The physical extent index of the list head.
3384 * @param iShwPT The shadow page table index.
3385 *
3386 */
3387static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3388{
3389 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3390 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3391
3392 /* special common case. */
3393 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3394 {
3395 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3396 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3397 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
3398 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3399 }
3400
3401 /* general treatment. */
3402 const uint16_t iPhysExtStart = iPhysExt;
3403 unsigned cMax = 15;
3404 for (;;)
3405 {
3406 Assert(iPhysExt < pPool->cMaxPhysExts);
3407 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3408 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3409 {
3410 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3411 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3412 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3413 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3414 }
3415 if (!--cMax)
3416 {
3417 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3418 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3419 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
3420 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3421 }
3422 }
3423
3424 /* add another extent to the list. */
3425 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3426 if (!pNew)
3427 {
3428 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3429 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3430 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3431 }
3432 pNew->iNext = iPhysExtStart;
3433 pNew->aidx[0] = iShwPT;
3434 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3435 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3436}
3437
3438
3439/**
3440 * Add a reference to guest physical page where extents are in use.
3441 *
3442 * @returns The new ram range flags (top 16-bits).
3443 *
3444 * @param pVM The VM handle.
3445 * @param u16 The ram range flags (top 16-bits).
3446 * @param iShwPT The shadow page table index.
3447 */
3448uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3449{
3450 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
3451 {
3452 /*
3453 * Convert to extent list.
3454 */
3455 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
3456 uint16_t iPhysExt;
3457 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3458 if (pPhysExt)
3459 {
3460 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
3461 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3462 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
3463 pPhysExt->aidx[1] = iShwPT;
3464 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3465 }
3466 else
3467 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3468 }
3469 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
3470 {
3471 /*
3472 * Insert into the extent list.
3473 */
3474 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
3475 }
3476 else
3477 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3478 return u16;
3479}
3480
3481
3482/**
3483 * Clear references to guest physical memory.
3484 *
3485 * @param pPool The pool.
3486 * @param pPage The page.
3487 * @param pPhysPage Pointer to the aPages entry in the ram range.
3488 */
3489void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3490{
3491 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
3492 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3493
3494 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
3495 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
3496 {
3497 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3498 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3499 do
3500 {
3501 Assert(iPhysExt < pPool->cMaxPhysExts);
3502
3503 /*
3504 * Look for the shadow page and check if it's all freed.
3505 */
3506 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3507 {
3508 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3509 {
3510 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3511
3512 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3513 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3514 {
3515 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3516 return;
3517 }
3518
3519 /* we can free the node. */
3520 PVM pVM = pPool->CTX_SUFF(pVM);
3521 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3522 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3523 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3524 {
3525 /* lonely node */
3526 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3527 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
3528 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3529 }
3530 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3531 {
3532 /* head */
3533 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
3534 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
3535 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
3536 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
3537 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3538 }
3539 else
3540 {
3541 /* in list */
3542 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3543 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3544 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3545 }
3546 iPhysExt = iPhysExtNext;
3547 return;
3548 }
3549 }
3550
3551 /* next */
3552 iPhysExtPrev = iPhysExt;
3553 iPhysExt = paPhysExts[iPhysExt].iNext;
3554 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3555
3556 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3557 }
3558 else /* nothing to do */
3559 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
3560}
3561
3562
3563/**
3564 * Clear references to guest physical memory.
3565 *
3566 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3567 * is assumed to be correct, so the linear search can be skipped and we can assert
3568 * at an earlier point.
3569 *
3570 * @param pPool The pool.
3571 * @param pPage The page.
3572 * @param HCPhys The host physical address corresponding to the guest page.
3573 * @param GCPhys The guest physical address corresponding to HCPhys.
3574 */
3575static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3576{
3577 /*
3578 * Walk range list.
3579 */
3580 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3581 while (pRam)
3582 {
3583 RTGCPHYS off = GCPhys - pRam->GCPhys;
3584 if (off < pRam->cb)
3585 {
3586 /* does it match? */
3587 const unsigned iPage = off >> PAGE_SHIFT;
3588 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3589#ifdef LOG_ENABLED
3590RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3591Log(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3592#endif
3593 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3594 {
3595 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3596 return;
3597 }
3598 break;
3599 }
3600 pRam = pRam->CTX_SUFF(pNext);
3601 }
3602 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3603}
3604
3605
3606/**
3607 * Clear references to guest physical memory.
3608 *
3609 * @param pPool The pool.
3610 * @param pPage The page.
3611 * @param HCPhys The host physical address corresponding to the guest page.
3612 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3613 */
3614static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3615{
3616 /*
3617 * Walk range list.
3618 */
3619 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3620 while (pRam)
3621 {
3622 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3623 if (off < pRam->cb)
3624 {
3625 /* does it match? */
3626 const unsigned iPage = off >> PAGE_SHIFT;
3627 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3628 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3629 {
3630 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3631 return;
3632 }
3633 break;
3634 }
3635 pRam = pRam->CTX_SUFF(pNext);
3636 }
3637
3638 /*
3639 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3640 */
3641 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3642 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3643 while (pRam)
3644 {
3645 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3646 while (iPage-- > 0)
3647 {
3648 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3649 {
3650 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3651 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3652 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3653 return;
3654 }
3655 }
3656 pRam = pRam->CTX_SUFF(pNext);
3657 }
3658
3659 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3660}
3661
3662
3663/**
3664 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3665 *
3666 * @param pPool The pool.
3667 * @param pPage The page.
3668 * @param pShwPT The shadow page table (mapping of the page).
3669 * @param pGstPT The guest page table.
3670 */
3671DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3672{
3673 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3674 if (pShwPT->a[i].n.u1Present)
3675 {
3676 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3677 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3678 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3679 if (!--pPage->cPresent)
3680 break;
3681 }
3682}
3683
3684
3685/**
3686 * Clear references to guest physical memory in a PAE / 32-bit page table.
3687 *
3688 * @param pPool The pool.
3689 * @param pPage The page.
3690 * @param pShwPT The shadow page table (mapping of the page).
3691 * @param pGstPT The guest page table (just a half one).
3692 */
3693DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3694{
3695 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3696 if (pShwPT->a[i].n.u1Present)
3697 {
3698 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3699 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3700 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3701 }
3702}
3703
3704
3705/**
3706 * Clear references to guest physical memory in a PAE / PAE page table.
3707 *
3708 * @param pPool The pool.
3709 * @param pPage The page.
3710 * @param pShwPT The shadow page table (mapping of the page).
3711 * @param pGstPT The guest page table.
3712 */
3713DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3714{
3715 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3716 if (pShwPT->a[i].n.u1Present)
3717 {
3718 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3719 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3720 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3721 }
3722}
3723
3724
3725/**
3726 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3727 *
3728 * @param pPool The pool.
3729 * @param pPage The page.
3730 * @param pShwPT The shadow page table (mapping of the page).
3731 */
3732DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3733{
3734 RTGCPHYS GCPhys = pPage->GCPhys;
3735 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3736 if (pShwPT->a[i].n.u1Present)
3737 {
3738 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3739 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3740 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3741 }
3742}
3743
3744
3745/**
3746 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3747 *
3748 * @param pPool The pool.
3749 * @param pPage The page.
3750 * @param pShwPT The shadow page table (mapping of the page).
3751 */
3752DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3753{
3754 RTGCPHYS GCPhys = pPage->GCPhys;
3755 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3756 if (pShwPT->a[i].n.u1Present)
3757 {
3758 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3759 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3760 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3761 }
3762}
3763
3764#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3765
3766
3767#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3768/**
3769 * Clear references to shadowed pages in a 32 bits page directory.
3770 *
3771 * @param pPool The pool.
3772 * @param pPage The page.
3773 * @param pShwPD The shadow page directory (mapping of the page).
3774 */
3775DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
3776{
3777 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3778 {
3779 if ( pShwPD->a[i].n.u1Present
3780 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3781 )
3782 {
3783 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
3784 if (pSubPage)
3785 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3786 else
3787 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
3788 }
3789 }
3790}
3791#endif
3792
3793/**
3794 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3795 *
3796 * @param pPool The pool.
3797 * @param pPage The page.
3798 * @param pShwPD The shadow page directory (mapping of the page).
3799 */
3800DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3801{
3802 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3803 {
3804 if ( pShwPD->a[i].n.u1Present
3805#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3806 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3807#endif
3808 )
3809 {
3810 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3811 if (pSubPage)
3812 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3813 else
3814 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3815 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3816 }
3817 }
3818}
3819
3820
3821/**
3822 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3823 *
3824 * @param pPool The pool.
3825 * @param pPage The page.
3826 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3827 */
3828DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3829{
3830 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3831 {
3832 if ( pShwPDPT->a[i].n.u1Present
3833#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3834 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
3835#endif
3836 )
3837 {
3838 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3839 if (pSubPage)
3840 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3841 else
3842 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3843 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3844 }
3845 }
3846}
3847
3848
3849/**
3850 * Clear references to shadowed pages in a 64-bit level 4 page table.
3851 *
3852 * @param pPool The pool.
3853 * @param pPage The page.
3854 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3855 */
3856DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3857{
3858 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3859 {
3860 if (pShwPML4->a[i].n.u1Present)
3861 {
3862 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3863 if (pSubPage)
3864 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3865 else
3866 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3867 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3868 }
3869 }
3870}
3871
3872
3873/**
3874 * Clear references to shadowed pages in an EPT page table.
3875 *
3876 * @param pPool The pool.
3877 * @param pPage The page.
3878 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3879 */
3880DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3881{
3882 RTGCPHYS GCPhys = pPage->GCPhys;
3883 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3884 if (pShwPT->a[i].n.u1Present)
3885 {
3886 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3887 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3888 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3889 }
3890}
3891
3892
3893/**
3894 * Clear references to shadowed pages in an EPT page directory.
3895 *
3896 * @param pPool The pool.
3897 * @param pPage The page.
3898 * @param pShwPD The shadow page directory (mapping of the page).
3899 */
3900DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3901{
3902 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3903 {
3904 if (pShwPD->a[i].n.u1Present)
3905 {
3906 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3907 if (pSubPage)
3908 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3909 else
3910 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3911 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3912 }
3913 }
3914}
3915
3916
3917/**
3918 * Clear references to shadowed pages in an EPT page directory pointer table.
3919 *
3920 * @param pPool The pool.
3921 * @param pPage The page.
3922 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3923 */
3924DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3925{
3926 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3927 {
3928 if (pShwPDPT->a[i].n.u1Present)
3929 {
3930 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3931 if (pSubPage)
3932 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3933 else
3934 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3935 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3936 }
3937 }
3938}
3939
3940
3941/**
3942 * Clears all references made by this page.
3943 *
3944 * This includes other shadow pages and GC physical addresses.
3945 *
3946 * @param pPool The pool.
3947 * @param pPage The page.
3948 */
3949static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3950{
3951 /*
3952 * Map the shadow page and take action according to the page kind.
3953 */
3954 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3955 switch (pPage->enmKind)
3956 {
3957#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3958 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3959 {
3960 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3961 void *pvGst;
3962 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3963 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3964 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3965 break;
3966 }
3967
3968 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3969 {
3970 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3971 void *pvGst;
3972 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3973 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3974 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3975 break;
3976 }
3977
3978 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3979 {
3980 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3981 void *pvGst;
3982 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3983 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3984 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3985 break;
3986 }
3987
3988 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3989 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3990 {
3991 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3992 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3993 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3994 break;
3995 }
3996
3997 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3998 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3999 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4000 {
4001 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4002 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4003 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4004 break;
4005 }
4006
4007#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4008 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4009 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4010 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4011 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4012 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4013 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4014 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4015 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4016 break;
4017#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4018
4019 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4020 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4021 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4022 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4023 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4024 case PGMPOOLKIND_PAE_PD_PHYS:
4025 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4026 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4027 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4028 break;
4029
4030#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4031 case PGMPOOLKIND_32BIT_PD_PHYS:
4032 case PGMPOOLKIND_32BIT_PD:
4033 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4034 break;
4035
4036 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4037 case PGMPOOLKIND_PAE_PDPT:
4038 case PGMPOOLKIND_PAE_PDPT_PHYS:
4039#endif
4040 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4041 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4042 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4043 break;
4044
4045 case PGMPOOLKIND_64BIT_PML4:
4046 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4047 break;
4048
4049 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4050 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4051 break;
4052
4053 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4054 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4055 break;
4056
4057 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4058 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4059 break;
4060
4061 default:
4062 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4063 }
4064
4065 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4066 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4067 ASMMemZeroPage(pvShw);
4068 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4069 pPage->fZeroed = true;
4070}
4071
4072#endif /* PGMPOOL_WITH_USER_TRACKING */
4073
4074/**
4075 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
4076 *
4077 * @param pPool The pool.
4078 */
4079static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
4080{
4081#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4082 /* Start a subset so we won't run out of mapping space. */
4083 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4084 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4085#endif
4086
4087 /*
4088 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
4089 */
4090 Assert(NIL_PGMPOOL_IDX == 0);
4091 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
4092 {
4093 /*
4094 * Get the page address.
4095 */
4096 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4097 union
4098 {
4099 uint64_t *pau64;
4100 uint32_t *pau32;
4101 } u;
4102
4103 /*
4104 * Mark stuff not present.
4105 */
4106 switch (pPage->enmKind)
4107 {
4108#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4109 case PGMPOOLKIND_ROOT_32BIT_PD:
4110 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4111 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
4112 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
4113 u.pau32[iPage] = 0;
4114 break;
4115
4116 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4117 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4118 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
4119 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
4120 u.pau64[iPage] = 0;
4121 break;
4122
4123 case PGMPOOLKIND_ROOT_PDPT:
4124 /* Not root of shadowed pages currently, ignore it. */
4125 break;
4126#endif
4127
4128 case PGMPOOLKIND_ROOT_NESTED:
4129 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4130 ASMMemZero32(u.pau64, PAGE_SIZE);
4131 break;
4132 }
4133 }
4134
4135 /*
4136 * Paranoia (to be removed), flag a global CR3 sync.
4137 */
4138 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
4139
4140#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4141 /* Pop the subset. */
4142 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4143#endif
4144}
4145
4146
4147/**
4148 * Flushes the entire cache.
4149 *
4150 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4151 * and execute this CR3 flush.
4152 *
4153 * @param pPool The pool.
4154 */
4155static void pgmPoolFlushAllInt(PPGMPOOL pPool)
4156{
4157 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
4158 LogFlow(("pgmPoolFlushAllInt:\n"));
4159
4160 /*
4161 * If there are no pages in the pool, there is nothing to do.
4162 */
4163 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4164 {
4165 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4166 return;
4167 }
4168
4169 /*
4170 * Nuke the free list and reinsert all pages into it.
4171 */
4172 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4173 {
4174 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4175
4176#ifdef IN_RING3
4177 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMR3, pPage->pvPageR3));
4178#endif
4179#ifdef PGMPOOL_WITH_MONITORING
4180 if (pPage->fMonitored)
4181 pgmPoolMonitorFlush(pPool, pPage);
4182 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4183 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4184 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4185 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4186 pPage->cModifications = 0;
4187#endif
4188 pPage->GCPhys = NIL_RTGCPHYS;
4189 pPage->enmKind = PGMPOOLKIND_FREE;
4190 Assert(pPage->idx == i);
4191 pPage->iNext = i + 1;
4192 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4193 pPage->fSeenNonGlobal = false;
4194 pPage->fMonitored= false;
4195 pPage->fCached = false;
4196 pPage->fReusedFlushPending = false;
4197#ifdef PGMPOOL_WITH_USER_TRACKING
4198 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4199#else
4200 pPage->fCR3Mix = false;
4201#endif
4202#ifdef PGMPOOL_WITH_CACHE
4203 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4204 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4205#endif
4206 }
4207 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4208 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4209 pPool->cUsedPages = 0;
4210
4211#ifdef PGMPOOL_WITH_USER_TRACKING
4212 /*
4213 * Zap and reinitialize the user records.
4214 */
4215 pPool->cPresent = 0;
4216 pPool->iUserFreeHead = 0;
4217 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4218 const unsigned cMaxUsers = pPool->cMaxUsers;
4219 for (unsigned i = 0; i < cMaxUsers; i++)
4220 {
4221 paUsers[i].iNext = i + 1;
4222 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4223 paUsers[i].iUserTable = 0xfffffffe;
4224 }
4225 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4226#endif
4227
4228#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4229 /*
4230 * Clear all the GCPhys links and rebuild the phys ext free list.
4231 */
4232 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4233 pRam;
4234 pRam = pRam->CTX_SUFF(pNext))
4235 {
4236 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4237 while (iPage-- > 0)
4238 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
4239 }
4240
4241 pPool->iPhysExtFreeHead = 0;
4242 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4243 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4244 for (unsigned i = 0; i < cMaxPhysExts; i++)
4245 {
4246 paPhysExts[i].iNext = i + 1;
4247 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4248 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4249 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4250 }
4251 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4252#endif
4253
4254#ifdef PGMPOOL_WITH_MONITORING
4255 /*
4256 * Just zap the modified list.
4257 */
4258 pPool->cModifiedPages = 0;
4259 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4260#endif
4261
4262#ifdef PGMPOOL_WITH_CACHE
4263 /*
4264 * Clear the GCPhys hash and the age list.
4265 */
4266 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4267 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4268 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4269 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4270#endif
4271
4272 /*
4273 * Flush all the special root pages.
4274 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4275 */
4276 pgmPoolFlushAllSpecialRoots(pPool);
4277 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4278 {
4279 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4280 pPage->iNext = NIL_PGMPOOL_IDX;
4281#ifdef PGMPOOL_WITH_MONITORING
4282 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4283 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4284 pPage->cModifications = 0;
4285 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4286 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4287 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4288 if (pPage->fMonitored)
4289 {
4290 PVM pVM = pPool->CTX_SUFF(pVM);
4291 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4292 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4293 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4294 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4295 pPool->pszAccessHandler);
4296 AssertFatalRCSuccess(rc);
4297# ifdef PGMPOOL_WITH_CACHE
4298 pgmPoolHashInsert(pPool, pPage);
4299# endif
4300 }
4301#endif
4302#ifdef PGMPOOL_WITH_USER_TRACKING
4303 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4304#endif
4305#ifdef PGMPOOL_WITH_CACHE
4306 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4307 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4308#endif
4309 }
4310
4311 /*
4312 * Finally, assert the FF.
4313 */
4314 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
4315
4316 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4317}
4318
4319
4320/**
4321 * Flushes a pool page.
4322 *
4323 * This moves the page to the free list after removing all user references to it.
4324 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
4325 *
4326 * @returns VBox status code.
4327 * @retval VINF_SUCCESS on success.
4328 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
4329 * @param pPool The pool.
4330 * @param HCPhys The HC physical address of the shadow page.
4331 */
4332int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4333{
4334 int rc = VINF_SUCCESS;
4335 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4336 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4337 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4338
4339 /*
4340 * Quietly reject any attempts at flushing any of the special root pages.
4341 */
4342 if (pPage->idx < PGMPOOL_IDX_FIRST)
4343 {
4344 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4345 return VINF_SUCCESS;
4346 }
4347
4348 /*
4349 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4350 */
4351 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
4352 {
4353#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4354 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4355 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4356 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4357 || pPage->enmKind == PGMPOOLKIND_32BIT_PD,
4358 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4359#else
4360 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4,
4361 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4362#endif
4363 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4364 return VINF_SUCCESS;
4365 }
4366
4367#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4368 /* Start a subset so we won't run out of mapping space. */
4369 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4370 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4371#endif
4372
4373 /*
4374 * Mark the page as being in need of a ASMMemZeroPage().
4375 */
4376 pPage->fZeroed = false;
4377
4378#ifdef PGMPOOL_WITH_USER_TRACKING
4379 /*
4380 * Clear the page.
4381 */
4382 pgmPoolTrackClearPageUsers(pPool, pPage);
4383 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4384 pgmPoolTrackDeref(pPool, pPage);
4385 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4386#endif
4387
4388#ifdef PGMPOOL_WITH_CACHE
4389 /*
4390 * Flush it from the cache.
4391 */
4392 pgmPoolCacheFlushPage(pPool, pPage);
4393#endif /* PGMPOOL_WITH_CACHE */
4394
4395#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4396 /* Heavy stuff done. */
4397 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4398#endif
4399
4400#ifdef PGMPOOL_WITH_MONITORING
4401 /*
4402 * Deregistering the monitoring.
4403 */
4404 if (pPage->fMonitored)
4405 rc = pgmPoolMonitorFlush(pPool, pPage);
4406#endif
4407
4408 /*
4409 * Free the page.
4410 */
4411 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4412 pPage->iNext = pPool->iFreeHead;
4413 pPool->iFreeHead = pPage->idx;
4414 pPage->enmKind = PGMPOOLKIND_FREE;
4415 pPage->GCPhys = NIL_RTGCPHYS;
4416 pPage->fReusedFlushPending = false;
4417
4418 pPool->cUsedPages--;
4419 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4420 return rc;
4421}
4422
4423
4424/**
4425 * Frees a usage of a pool page.
4426 *
4427 * The caller is responsible to updating the user table so that it no longer
4428 * references the shadow page.
4429 *
4430 * @param pPool The pool.
4431 * @param HCPhys The HC physical address of the shadow page.
4432 * @param iUser The shadow page pool index of the user table.
4433 * @param iUserTable The index into the user table (shadowed).
4434 */
4435void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4436{
4437 STAM_PROFILE_START(&pPool->StatFree, a);
4438 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4439 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4440 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4441#ifdef PGMPOOL_WITH_USER_TRACKING
4442 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4443#endif
4444#ifdef PGMPOOL_WITH_CACHE
4445 if (!pPage->fCached)
4446#endif
4447 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
4448 STAM_PROFILE_STOP(&pPool->StatFree, a);
4449}
4450
4451
4452/**
4453 * Makes one or more free page free.
4454 *
4455 * @returns VBox status code.
4456 * @retval VINF_SUCCESS on success.
4457 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4458 *
4459 * @param pPool The pool.
4460 * @param iUser The user of the page.
4461 */
4462static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
4463{
4464 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4465
4466 /*
4467 * If the pool isn't full grown yet, expand it.
4468 */
4469 if (pPool->cCurPages < pPool->cMaxPages)
4470 {
4471 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4472#ifdef IN_RING3
4473 int rc = PGMR3PoolGrow(pPool->pVMR3);
4474#else
4475 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
4476#endif
4477 if (RT_FAILURE(rc))
4478 return rc;
4479 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4480 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4481 return VINF_SUCCESS;
4482 }
4483
4484#ifdef PGMPOOL_WITH_CACHE
4485 /*
4486 * Free one cached page.
4487 */
4488 return pgmPoolCacheFreeOne(pPool, iUser);
4489#else
4490 /*
4491 * Flush the pool.
4492 *
4493 * If we have tracking enabled, it should be possible to come up with
4494 * a cheap replacement strategy...
4495 */
4496 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
4497 Assert(!CPUMIsGuestInLongMode(pVM));
4498 pgmPoolFlushAllInt(pPool);
4499 return VERR_PGM_POOL_FLUSHED;
4500#endif
4501}
4502
4503
4504/**
4505 * Allocates a page from the pool.
4506 *
4507 * This page may actually be a cached page and not in need of any processing
4508 * on the callers part.
4509 *
4510 * @returns VBox status code.
4511 * @retval VINF_SUCCESS if a NEW page was allocated.
4512 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4513 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4514 * @param pVM The VM handle.
4515 * @param GCPhys The GC physical address of the page we're gonna shadow.
4516 * For 4MB and 2MB PD entries, it's the first address the
4517 * shadow PT is covering.
4518 * @param enmKind The kind of mapping.
4519 * @param iUser The shadow page pool index of the user table.
4520 * @param iUserTable The index into the user table (shadowed).
4521 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4522 */
4523int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
4524{
4525 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4526 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4527 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4528 *ppPage = NULL;
4529 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4530 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4531 * Assert(!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4532
4533#ifdef PGMPOOL_WITH_CACHE
4534 if (pPool->fCacheEnabled)
4535 {
4536 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
4537 if (RT_SUCCESS(rc2))
4538 {
4539 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4540 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4541 return rc2;
4542 }
4543 }
4544#endif
4545
4546 /*
4547 * Allocate a new one.
4548 */
4549 int rc = VINF_SUCCESS;
4550 uint16_t iNew = pPool->iFreeHead;
4551 if (iNew == NIL_PGMPOOL_IDX)
4552 {
4553 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
4554 if (RT_FAILURE(rc))
4555 {
4556 if (rc != VERR_PGM_POOL_CLEARED)
4557 {
4558 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4559 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4560 return rc;
4561 }
4562 Log(("pgmPoolMakeMoreFreePages failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
4563 rc = VERR_PGM_POOL_FLUSHED;
4564 }
4565 iNew = pPool->iFreeHead;
4566 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4567 }
4568
4569 /* unlink the free head */
4570 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4571 pPool->iFreeHead = pPage->iNext;
4572 pPage->iNext = NIL_PGMPOOL_IDX;
4573
4574 /*
4575 * Initialize it.
4576 */
4577 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4578 pPage->enmKind = enmKind;
4579 pPage->GCPhys = GCPhys;
4580 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4581 pPage->fMonitored = false;
4582 pPage->fCached = false;
4583 pPage->fReusedFlushPending = false;
4584#ifdef PGMPOOL_WITH_MONITORING
4585 pPage->cModifications = 0;
4586 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4587 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4588#else
4589 pPage->fCR3Mix = false;
4590#endif
4591#ifdef PGMPOOL_WITH_USER_TRACKING
4592 pPage->cPresent = 0;
4593 pPage->iFirstPresent = ~0;
4594
4595 /*
4596 * Insert into the tracking and cache. If this fails, free the page.
4597 */
4598 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4599 if (RT_FAILURE(rc3))
4600 {
4601 if (rc3 != VERR_PGM_POOL_CLEARED)
4602 {
4603 pPool->cUsedPages--;
4604 pPage->enmKind = PGMPOOLKIND_FREE;
4605 pPage->GCPhys = NIL_RTGCPHYS;
4606 pPage->iNext = pPool->iFreeHead;
4607 pPool->iFreeHead = pPage->idx;
4608 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4609 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4610 return rc3;
4611 }
4612 Log(("pgmPoolTrackInsert failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4613 rc = VERR_PGM_POOL_FLUSHED;
4614 }
4615#endif /* PGMPOOL_WITH_USER_TRACKING */
4616
4617 /*
4618 * Commit the allocation, clear the page and return.
4619 */
4620#ifdef VBOX_WITH_STATISTICS
4621 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4622 pPool->cUsedPagesHigh = pPool->cUsedPages;
4623#endif
4624
4625 if (!pPage->fZeroed)
4626 {
4627 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4628 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4629 ASMMemZeroPage(pv);
4630 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4631 }
4632
4633 *ppPage = pPage;
4634 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4635 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4636 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4637 return rc;
4638}
4639
4640
4641/**
4642 * Frees a usage of a pool page.
4643 *
4644 * @param pVM The VM handle.
4645 * @param HCPhys The HC physical address of the shadow page.
4646 * @param iUser The shadow page pool index of the user table.
4647 * @param iUserTable The index into the user table (shadowed).
4648 */
4649void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4650{
4651 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4652 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4653 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4654}
4655
4656
4657/**
4658 * Gets a in-use page in the pool by it's physical address.
4659 *
4660 * @returns Pointer to the page.
4661 * @param pVM The VM handle.
4662 * @param HCPhys The HC physical address of the shadow page.
4663 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4664 */
4665PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4666{
4667 /** @todo profile this! */
4668 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4669 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4670 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%RHp -> %p:{.idx=%d .GCPhys=%RGp .enmKind=%s}\n",
4671 HCPhys, pPage, pPage->idx, pPage->GCPhys, pgmPoolPoolKindToStr(pPage->enmKind)));
4672 return pPage;
4673}
4674
4675
4676/**
4677 * Flushes the entire cache.
4678 *
4679 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4680 * and execute this CR3 flush.
4681 *
4682 * @param pPool The pool.
4683 */
4684void pgmPoolFlushAll(PVM pVM)
4685{
4686 LogFlow(("pgmPoolFlushAll:\n"));
4687 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4688}
4689
4690#ifdef LOG_ENABLED
4691static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
4692{
4693 switch(enmKind)
4694 {
4695 case PGMPOOLKIND_INVALID:
4696 return "PGMPOOLKIND_INVALID";
4697 case PGMPOOLKIND_FREE:
4698 return "PGMPOOLKIND_FREE";
4699 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4700 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4701 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4702 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4703 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4704 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4705 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4706 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4707 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4708 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4709 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4710 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
4711 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4712 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
4713 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4714 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
4715 case PGMPOOLKIND_32BIT_PD:
4716 return "PGMPOOLKIND_32BIT_PD";
4717 case PGMPOOLKIND_32BIT_PD_PHYS:
4718 return "PGMPOOLKIND_32BIT_PD_PHYS";
4719 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4720 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
4721 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4722 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
4723 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4724 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
4725 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4726 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
4727 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4728 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
4729 case PGMPOOLKIND_PAE_PD_PHYS:
4730 return "PGMPOOLKIND_PAE_PD_PHYS";
4731 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4732 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
4733 case PGMPOOLKIND_PAE_PDPT:
4734 return "PGMPOOLKIND_PAE_PDPT";
4735 case PGMPOOLKIND_PAE_PDPT_PHYS:
4736 return "PGMPOOLKIND_PAE_PDPT_PHYS";
4737 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4738 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
4739 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4740 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
4741 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4742 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
4743 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4744 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
4745 case PGMPOOLKIND_64BIT_PML4:
4746 return "PGMPOOLKIND_64BIT_PML4";
4747 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4748 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
4749 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4750 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
4751 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4752 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
4753#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4754 case PGMPOOLKIND_ROOT_32BIT_PD:
4755 return "PGMPOOLKIND_ROOT_32BIT_PD";
4756 case PGMPOOLKIND_ROOT_PAE_PD:
4757 return "PGMPOOLKIND_ROOT_PAE_PD";
4758 case PGMPOOLKIND_ROOT_PDPT:
4759 return "PGMPOOLKIND_ROOT_PDPT";
4760#endif
4761 case PGMPOOLKIND_ROOT_NESTED:
4762 return "PGMPOOLKIND_ROOT_NESTED";
4763 }
4764 return "Unknown kind!";
4765}
4766#endif
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette