VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 16376

Last change on this file since 16376 was 16376, checked in by vboxsync, 16 years ago

Paging updates. Deal with mode switches and cr3 updates.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 155.6 KB
Line 
1/* $Id: PGMAllPool.cpp 16376 2009-01-29 16:46:31Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42
43
44/*******************************************************************************
45* Internal Functions *
46*******************************************************************************/
47__BEGIN_DECLS
48static void pgmPoolFlushAllInt(PPGMPOOL pPool);
49#ifdef PGMPOOL_WITH_USER_TRACKING
50DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
51DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#endif
54#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
55static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
56#endif
57#ifdef PGMPOOL_WITH_CACHE
58static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
59#endif
60#ifdef PGMPOOL_WITH_MONITORING
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#endif
63#ifndef IN_RING3
64DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
65#endif
66__END_DECLS
67
68
69/**
70 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
71 *
72 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
73 * @param enmKind The page kind.
74 */
75DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
76{
77 switch (enmKind)
78 {
79 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
82 return true;
83 default:
84 return false;
85 }
86}
87
88
89#if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
90/**
91 * Maps a pool page into the current context.
92 *
93 * @returns Pointer to the mapping.
94 * @param pPGM Pointer to the PGM instance data.
95 * @param pPage The page to map.
96 */
97void *pgmPoolMapPageFallback(PPGM pPGM, PPGMPOOLPAGE pPage)
98{
99 /* general pages are take care of by the inlined part, it
100 only ends up here in case of failure. */
101 AssertReleaseReturn(pPage->idx < PGMPOOL_IDX_FIRST, NULL);
102
103/** @todo make sure HCPhys is valid for *all* indexes. */
104 /* special pages. */
105# ifdef IN_RC
106 switch (pPage->idx)
107 {
108# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
109 case PGMPOOL_IDX_PD:
110 case PGMPOOL_IDX_PDPT:
111 case PGMPOOL_IDX_AMD64_CR3:
112 return pPGM->pShwRootRC;
113# else
114 case PGMPOOL_IDX_PD:
115 return pPGM->pShw32BitPdRC;
116 case PGMPOOL_IDX_PAE_PD:
117 case PGMPOOL_IDX_PAE_PD_0:
118 return pPGM->apShwPaePDsRC[0];
119 case PGMPOOL_IDX_PAE_PD_1:
120 return pPGM->apShwPaePDsRC[1];
121 case PGMPOOL_IDX_PAE_PD_2:
122 return pPGM->apShwPaePDsRC[2];
123 case PGMPOOL_IDX_PAE_PD_3:
124 return pPGM->apShwPaePDsRC[3];
125 case PGMPOOL_IDX_PDPT:
126 return pPGM->pShwPaePdptRC;
127# endif
128 default:
129 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
130 return NULL;
131 }
132
133# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
134 RTHCPHYS HCPhys;
135 switch (pPage->idx)
136 {
137# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
138 case PGMPOOL_IDX_PD:
139 case PGMPOOL_IDX_PDPT:
140 case PGMPOOL_IDX_AMD64_CR3:
141 HCPhys = pPGM->HCPhysShwCR3;
142 break;
143
144 case PGMPOOL_IDX_NESTED_ROOT:
145 HCPhys = pPGM->HCPhysShwNestedRoot;
146 break;
147# else
148 case PGMPOOL_IDX_PD:
149 HCPhys = pPGM->HCPhysShw32BitPD;
150 break;
151 case PGMPOOL_IDX_PAE_PD_0:
152 HCPhys = pPGM->aHCPhysPaePDs[0];
153 break;
154 case PGMPOOL_IDX_PAE_PD_1:
155 HCPhys = pPGM->aHCPhysPaePDs[1];
156 break;
157 case PGMPOOL_IDX_PAE_PD_2:
158 HCPhys = pPGM->aHCPhysPaePDs[2];
159 break;
160 case PGMPOOL_IDX_PAE_PD_3:
161 HCPhys = pPGM->aHCPhysPaePDs[3];
162 break;
163 case PGMPOOL_IDX_PDPT:
164 HCPhys = pPGM->HCPhysShwPaePdpt;
165 break;
166 case PGMPOOL_IDX_NESTED_ROOT:
167 HCPhys = pPGM->HCPhysShwNestedRoot;
168 break;
169 case PGMPOOL_IDX_PAE_PD:
170 AssertReleaseMsgFailed(("PGMPOOL_IDX_PAE_PD is not usable in VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 context\n"));
171 return NULL;
172# endif
173 default:
174 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
175 return NULL;
176 }
177 AssertMsg(HCPhys && HCPhys != NIL_RTHCPHYS && !(PAGE_OFFSET_MASK & HCPhys), ("%RHp\n", HCPhys));
178
179 void *pv;
180 pgmR0DynMapHCPageInlined(pPGM, HCPhys, &pv);
181 return pv;
182# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
183}
184#endif /* IN_RC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
185
186
187#ifdef PGMPOOL_WITH_MONITORING
188/**
189 * Determin the size of a write instruction.
190 * @returns number of bytes written.
191 * @param pDis The disassembler state.
192 */
193static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
194{
195 /*
196 * This is very crude and possibly wrong for some opcodes,
197 * but since it's not really supposed to be called we can
198 * probably live with that.
199 */
200 return DISGetParamSize(pDis, &pDis->param1);
201}
202
203
204/**
205 * Flushes a chain of pages sharing the same access monitor.
206 *
207 * @returns VBox status code suitable for scheduling.
208 * @param pPool The pool.
209 * @param pPage A page in the chain.
210 */
211int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
212{
213 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
214
215 /*
216 * Find the list head.
217 */
218 uint16_t idx = pPage->idx;
219 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
220 {
221 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
222 {
223 idx = pPage->iMonitoredPrev;
224 Assert(idx != pPage->idx);
225 pPage = &pPool->aPages[idx];
226 }
227 }
228
229 /*
230 * Iterate the list flushing each shadow page.
231 */
232 int rc = VINF_SUCCESS;
233 for (;;)
234 {
235 idx = pPage->iMonitoredNext;
236 Assert(idx != pPage->idx);
237 if (pPage->idx >= PGMPOOL_IDX_FIRST)
238 {
239 int rc2 = pgmPoolFlushPage(pPool, pPage);
240 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
241 rc = VINF_PGM_SYNC_CR3;
242 }
243 /* next */
244 if (idx == NIL_PGMPOOL_IDX)
245 break;
246 pPage = &pPool->aPages[idx];
247 }
248 return rc;
249}
250
251
252/**
253 * Wrapper for getting the current context pointer to the entry being modified.
254 *
255 * @returns Pointer to the current context mapping of the entry.
256 * @param pPool The pool.
257 * @param pvFault The fault virtual address.
258 * @param GCPhysFault The fault physical address.
259 * @param cbEntry The entry size.
260 */
261#ifdef IN_RING3
262DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
263#else
264DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
265#endif
266{
267#ifdef IN_RC
268 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
269
270#elif defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
271 void *pvRet;
272 int rc = PGMDynMapGCPageOff(pPool->pVMR0, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
273 AssertFatalRCSuccess(rc);
274 return pvRet;
275
276#elif defined(IN_RING0)
277 void *pvRet;
278 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMR0->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
279 AssertFatalRCSuccess(rc);
280 return pvRet;
281
282#elif defined(IN_RING3)
283 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
284#else
285# error "huh?"
286#endif
287}
288
289
290/**
291 * Process shadow entries before they are changed by the guest.
292 *
293 * For PT entries we will clear them. For PD entries, we'll simply check
294 * for mapping conflicts and set the SyncCR3 FF if found.
295 *
296 * @param pPool The pool.
297 * @param pPage The head page.
298 * @param GCPhysFault The guest physical fault address.
299 * @param uAddress In R0 and GC this is the guest context fault address (flat).
300 * In R3 this is the host context 'fault' address.
301 * @param pCpu The disassembler state for figuring out the write size.
302 * This need not be specified if the caller knows we won't do cross entry accesses.
303 */
304#ifdef IN_RING3
305void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
306#else
307void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
308#endif
309{
310 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
311 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
312 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
313
314 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
315
316 for (;;)
317 {
318 union
319 {
320 void *pv;
321 PX86PT pPT;
322 PX86PTPAE pPTPae;
323 PX86PD pPD;
324 PX86PDPAE pPDPae;
325 PX86PDPT pPDPT;
326 PX86PML4 pPML4;
327 } uShw;
328
329 switch (pPage->enmKind)
330 {
331 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
332 {
333 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
334 const unsigned iShw = off / sizeof(X86PTE);
335 if (uShw.pPT->a[iShw].n.u1Present)
336 {
337# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
338 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
339 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
340 pgmPoolTracDerefGCPhysHint(pPool, pPage,
341 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
342 pGstPte->u & X86_PTE_PG_MASK);
343# endif
344 uShw.pPT->a[iShw].u = 0;
345 }
346 break;
347 }
348
349 /* page/2 sized */
350 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
351 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
352 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
353 {
354 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
355 if (uShw.pPTPae->a[iShw].n.u1Present)
356 {
357# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
358 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
359 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
360 pgmPoolTracDerefGCPhysHint(pPool, pPage,
361 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
362 pGstPte->u & X86_PTE_PG_MASK);
363# endif
364 uShw.pPTPae->a[iShw].u = 0;
365 }
366 }
367 break;
368
369 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
370 {
371 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
372 const unsigned iShw = off / sizeof(X86PTEPAE);
373 if (uShw.pPTPae->a[iShw].n.u1Present)
374 {
375# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
376 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
377 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
378 pgmPoolTracDerefGCPhysHint(pPool, pPage,
379 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
380 pGstPte->u & X86_PTE_PAE_PG_MASK);
381# endif
382 uShw.pPTPae->a[iShw].u = 0;
383 }
384
385 /* paranoia / a bit assumptive. */
386 if ( pCpu
387 && (off & 7)
388 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
389 {
390 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
391 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
392
393 if (uShw.pPTPae->a[iShw2].n.u1Present)
394 {
395# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
396 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
397 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
398 pgmPoolTracDerefGCPhysHint(pPool, pPage,
399 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
400 pGstPte->u & X86_PTE_PAE_PG_MASK);
401# endif
402 uShw.pPTPae->a[iShw2].u = 0;
403 }
404 }
405
406 break;
407 }
408
409# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
410 case PGMPOOLKIND_32BIT_PD:
411# else
412 case PGMPOOLKIND_ROOT_32BIT_PD:
413# endif
414 {
415 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
416 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
417 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
418 {
419 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
420 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
421 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
422 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
423 }
424 /* paranoia / a bit assumptive. */
425 else if ( pCpu
426 && (off & 3)
427 && (off & 3) + cbWrite > sizeof(X86PTE))
428 {
429 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
430 if ( iShw2 != iShw
431 && iShw2 < RT_ELEMENTS(uShw.pPD->a)
432 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
433 {
434 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
435 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
436 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
437 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
438 }
439 }
440#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
441 if ( uShw.pPD->a[iShw].n.u1Present
442 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
443 {
444 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
445# ifdef IN_RC /* TLB load - we're pushing things a bit... */
446 ASMProbeReadByte(pvAddress);
447# endif
448 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
449 uShw.pPD->a[iShw].u = 0;
450 }
451#endif
452 break;
453 }
454
455# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
456 case PGMPOOLKIND_ROOT_PAE_PD:
457 {
458 unsigned iGst = off / sizeof(X86PDE); // ASSUMING 32-bit guest paging!
459 unsigned iShwPdpt = iGst / 256;
460 unsigned iShw = (iGst % 256) * 2;
461 Assert(pPage->idx == PGMPOOL_IDX_PAE_PD);
462 PPGMPOOLPAGE pPage2 = pPage + 1 + iShwPdpt;
463 Assert(pPage2->idx == PGMPOOL_IDX_PAE_PD_0 + iShwPdpt);
464 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage2);
465 for (unsigned i = 0; i < 2; i++, iShw++)
466 {
467 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
468 {
469 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
470 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
471 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw));
472 }
473 /* paranoia / a bit assumptive. */
474 else if ( pCpu
475 && (off & 3)
476 && (off & 3) + cbWrite > 4)
477 {
478 const unsigned iShw2 = iShw + 2;
479 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a) /** @todo was completely wrong, it's better now after #1865 but still wrong from cross PD. */
480 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
481 {
482 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
483 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
484 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
485 }
486 }
487#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
488 if ( uShw.pPDPae->a[iShw].n.u1Present
489 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
490 {
491 LogFlow(("pgmPoolMonitorChainChanging: iShwPdpt=%#x iShw=%#x: %RX64 -> freeing it!\n", iShwPdpt, iShw, uShw.pPDPae->a[iShw].u));
492# ifdef IN_RC /* TLB load - we're pushing things a bit... */
493 ASMProbeReadByte(pvAddress);
494# endif
495 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + iShwPdpt * X86_PG_PAE_ENTRIES);
496 uShw.pPDPae->a[iShw].u = 0;
497 }
498#endif
499 }
500 break;
501 }
502# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
503
504 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
505 {
506 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
507 const unsigned iShw = off / sizeof(X86PDEPAE);
508 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
509 {
510 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
511 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
512 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
513 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
514 }
515#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
516 /*
517 * Causes trouble when the guest uses a PDE to refer to the whole page table level
518 * structure. (Invalidate here; faults later on when it tries to change the page
519 * table entries -> recheck; probably only applies to the RC case.)
520 */
521 else
522 {
523 if (uShw.pPDPae->a[iShw].n.u1Present)
524 {
525 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
526 pgmPoolFree(pPool->CTX_SUFF(pVM),
527 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
528 /* Note: hardcoded PAE implementation dependency */
529 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
530 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
531 uShw.pPDPae->a[iShw].u = 0;
532 }
533 }
534#endif
535 /* paranoia / a bit assumptive. */
536 if ( pCpu
537 && (off & 7)
538 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
539 {
540 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
541 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
542
543 if ( iShw2 != iShw
544 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
545 {
546 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
547 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
548 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
549 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
550 }
551#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
552 else if (uShw.pPDPae->a[iShw2].n.u1Present)
553 {
554 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
555 pgmPoolFree(pPool->CTX_SUFF(pVM),
556 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
557 /* Note: hardcoded PAE implementation dependency */
558 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
559 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
560 uShw.pPDPae->a[iShw2].u = 0;
561 }
562#endif
563 }
564 break;
565 }
566
567# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
568 case PGMPOOLKIND_PAE_PDPT:
569# else
570 case PGMPOOLKIND_ROOT_PDPT:
571# endif
572 {
573 /*
574 * Hopefully this doesn't happen very often:
575 * - touching unused parts of the page
576 * - messing with the bits of pd pointers without changing the physical address
577 */
578 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
579 const unsigned iShw = off / sizeof(X86PDPE);
580 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
581 {
582 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
583 {
584 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
585 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
586 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
587 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
588 }
589 /* paranoia / a bit assumptive. */
590 else if ( pCpu
591 && (off & 7)
592 && (off & 7) + cbWrite > sizeof(X86PDPE))
593 {
594 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
595 if ( iShw2 != iShw
596 && iShw2 < X86_PG_PAE_PDPE_ENTRIES
597 && uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
598 {
599 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
600 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
601 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
602 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
603 }
604 }
605 }
606 break;
607 }
608
609#ifndef IN_RC
610 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
611 {
612 Assert(pPage->enmKind == PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD);
613
614 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
615 const unsigned iShw = off / sizeof(X86PDEPAE);
616 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
617 {
618 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
619 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
620 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
621 }
622 else
623 {
624 if (uShw.pPDPae->a[iShw].n.u1Present)
625 {
626 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
627 pgmPoolFree(pPool->CTX_SUFF(pVM),
628 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
629 pPage->idx,
630 iShw);
631 uShw.pPDPae->a[iShw].u = 0;
632 }
633 }
634 /* paranoia / a bit assumptive. */
635 if ( pCpu
636 && (off & 7)
637 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
638 {
639 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
640 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
641
642 if ( iShw2 != iShw
643 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
644 {
645 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
646 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
647 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
648 }
649 else
650 if (uShw.pPDPae->a[iShw2].n.u1Present)
651 {
652 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
653 pgmPoolFree(pPool->CTX_SUFF(pVM),
654 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
655 pPage->idx,
656 iShw2);
657 uShw.pPDPae->a[iShw2].u = 0;
658 }
659 }
660 break;
661 }
662
663 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
664 {
665 /*
666 * Hopefully this doesn't happen very often:
667 * - messing with the bits of pd pointers without changing the physical address
668 */
669 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
670 {
671 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
672 const unsigned iShw = off / sizeof(X86PDPE);
673 if (uShw.pPDPT->a[iShw].n.u1Present)
674 {
675 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
676 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
677 uShw.pPDPT->a[iShw].u = 0;
678 }
679 /* paranoia / a bit assumptive. */
680 if ( pCpu
681 && (off & 7)
682 && (off & 7) + cbWrite > sizeof(X86PDPE))
683 {
684 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
685 if (uShw.pPDPT->a[iShw2].n.u1Present)
686 {
687 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
688 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
689 uShw.pPDPT->a[iShw2].u = 0;
690 }
691 }
692 }
693 break;
694 }
695
696 case PGMPOOLKIND_64BIT_PML4:
697 {
698 /*
699 * Hopefully this doesn't happen very often:
700 * - messing with the bits of pd pointers without changing the physical address
701 */
702 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
703 {
704 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
705 const unsigned iShw = off / sizeof(X86PDPE);
706 if (uShw.pPML4->a[iShw].n.u1Present)
707 {
708 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
709 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
710 uShw.pPML4->a[iShw].u = 0;
711 }
712 /* paranoia / a bit assumptive. */
713 if ( pCpu
714 && (off & 7)
715 && (off & 7) + cbWrite > sizeof(X86PDPE))
716 {
717 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
718 if (uShw.pPML4->a[iShw2].n.u1Present)
719 {
720 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
721 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
722 uShw.pPML4->a[iShw2].u = 0;
723 }
724 }
725 }
726 break;
727 }
728#endif /* IN_RING0 */
729
730 default:
731 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
732 }
733
734 /* next */
735 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
736 return;
737 pPage = &pPool->aPages[pPage->iMonitoredNext];
738 }
739}
740
741
742# ifndef IN_RING3
743/**
744 * Checks if a access could be a fork operation in progress.
745 *
746 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
747 *
748 * @returns true if it's likly that we're forking, otherwise false.
749 * @param pPool The pool.
750 * @param pCpu The disassembled instruction.
751 * @param offFault The access offset.
752 */
753DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
754{
755 /*
756 * i386 linux is using btr to clear X86_PTE_RW.
757 * The functions involved are (2.6.16 source inspection):
758 * clear_bit
759 * ptep_set_wrprotect
760 * copy_one_pte
761 * copy_pte_range
762 * copy_pmd_range
763 * copy_pud_range
764 * copy_page_range
765 * dup_mmap
766 * dup_mm
767 * copy_mm
768 * copy_process
769 * do_fork
770 */
771 if ( pCpu->pCurInstr->opcode == OP_BTR
772 && !(offFault & 4)
773 /** @todo Validate that the bit index is X86_PTE_RW. */
774 )
775 {
776 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
777 return true;
778 }
779 return false;
780}
781
782
783/**
784 * Determine whether the page is likely to have been reused.
785 *
786 * @returns true if we consider the page as being reused for a different purpose.
787 * @returns false if we consider it to still be a paging page.
788 * @param pVM VM Handle.
789 * @param pPage The page in question.
790 * @param pRegFrame Trap register frame.
791 * @param pCpu The disassembly info for the faulting instruction.
792 * @param pvFault The fault address.
793 *
794 * @remark The REP prefix check is left to the caller because of STOSD/W.
795 */
796DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
797{
798#ifndef IN_RC
799 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
800 if ( HWACCMHasPendingIrq(pVM)
801 && (pRegFrame->rsp - pvFault) < 32)
802 {
803 /* Fault caused by stack writes while trying to inject an interrupt event. */
804 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
805 return true;
806 }
807#else
808 NOREF(pVM); NOREF(pvFault);
809#endif
810
811 switch (pCpu->pCurInstr->opcode)
812 {
813 /* call implies the actual push of the return address faulted */
814 case OP_CALL:
815 Log4(("pgmPoolMonitorIsReused: CALL\n"));
816 return true;
817 case OP_PUSH:
818 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
819 return true;
820 case OP_PUSHF:
821 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
822 return true;
823 case OP_PUSHA:
824 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
825 return true;
826 case OP_FXSAVE:
827 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
828 return true;
829 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
830 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
831 return true;
832 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
833 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
834 return true;
835 case OP_MOVSWD:
836 case OP_STOSWD:
837 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
838 && pRegFrame->rcx >= 0x40
839 )
840 {
841 Assert(pCpu->mode == CPUMODE_64BIT);
842
843 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
844 return true;
845 }
846 return false;
847 }
848 if ( (pCpu->param1.flags & USE_REG_GEN32)
849 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
850 {
851 Log4(("pgmPoolMonitorIsReused: ESP\n"));
852 return true;
853 }
854
855 //if (pPage->fCR3Mix)
856 // return false;
857 return false;
858}
859
860
861/**
862 * Flushes the page being accessed.
863 *
864 * @returns VBox status code suitable for scheduling.
865 * @param pVM The VM handle.
866 * @param pPool The pool.
867 * @param pPage The pool page (head).
868 * @param pCpu The disassembly of the write instruction.
869 * @param pRegFrame The trap register frame.
870 * @param GCPhysFault The fault address as guest physical address.
871 * @param pvFault The fault address.
872 */
873static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
874 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
875{
876 /*
877 * First, do the flushing.
878 */
879 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
880
881 /*
882 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
883 */
884 uint32_t cbWritten;
885 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
886 if (RT_SUCCESS(rc2))
887 pRegFrame->rip += pCpu->opsize;
888 else if (rc2 == VERR_EM_INTERPRETER)
889 {
890#ifdef IN_RC
891 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
892 {
893 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
894 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
895 rc = VINF_SUCCESS;
896 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
897 }
898 else
899#endif
900 {
901 rc = VINF_EM_RAW_EMULATE_INSTR;
902 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
903 }
904 }
905 else
906 rc = rc2;
907
908 /* See use in pgmPoolAccessHandlerSimple(). */
909 PGM_INVL_GUEST_TLBS();
910
911 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
912 return rc;
913
914}
915
916
917/**
918 * Handles the STOSD write accesses.
919 *
920 * @returns VBox status code suitable for scheduling.
921 * @param pVM The VM handle.
922 * @param pPool The pool.
923 * @param pPage The pool page (head).
924 * @param pCpu The disassembly of the write instruction.
925 * @param pRegFrame The trap register frame.
926 * @param GCPhysFault The fault address as guest physical address.
927 * @param pvFault The fault address.
928 */
929DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
930 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
931{
932 Assert(pCpu->mode == CPUMODE_32BIT);
933
934 /*
935 * Increment the modification counter and insert it into the list
936 * of modified pages the first time.
937 */
938 if (!pPage->cModifications++)
939 pgmPoolMonitorModifiedInsert(pPool, pPage);
940
941 /*
942 * Execute REP STOSD.
943 *
944 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
945 * write situation, meaning that it's safe to write here.
946 */
947#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
948 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
949#endif
950 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
951 while (pRegFrame->ecx)
952 {
953#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
954 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
955 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
956 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
957#else
958 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
959#endif
960#ifdef IN_RC
961 *(uint32_t *)pu32 = pRegFrame->eax;
962#else
963 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
964#endif
965 pu32 += 4;
966 GCPhysFault += 4;
967 pRegFrame->edi += 4;
968 pRegFrame->ecx--;
969 }
970 pRegFrame->rip += pCpu->opsize;
971
972 /* See use in pgmPoolAccessHandlerSimple(). */
973 PGM_INVL_GUEST_TLBS();
974
975 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
976 return VINF_SUCCESS;
977}
978
979
980/**
981 * Handles the simple write accesses.
982 *
983 * @returns VBox status code suitable for scheduling.
984 * @param pVM The VM handle.
985 * @param pPool The pool.
986 * @param pPage The pool page (head).
987 * @param pCpu The disassembly of the write instruction.
988 * @param pRegFrame The trap register frame.
989 * @param GCPhysFault The fault address as guest physical address.
990 * @param pvFault The fault address.
991 */
992DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
993 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
994{
995 /*
996 * Increment the modification counter and insert it into the list
997 * of modified pages the first time.
998 */
999 if (!pPage->cModifications++)
1000 pgmPoolMonitorModifiedInsert(pPool, pPage);
1001
1002 /*
1003 * Clear all the pages. ASSUMES that pvFault is readable.
1004 */
1005#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1006 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1007 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1008 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1009 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1010#else
1011 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1012#endif
1013
1014 /*
1015 * Interpret the instruction.
1016 */
1017 uint32_t cb;
1018 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
1019 if (RT_SUCCESS(rc))
1020 pRegFrame->rip += pCpu->opsize;
1021 else if (rc == VERR_EM_INTERPRETER)
1022 {
1023 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1024 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
1025 rc = VINF_EM_RAW_EMULATE_INSTR;
1026 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1027 }
1028
1029 /*
1030 * Quick hack, with logging enabled we're getting stale
1031 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1032 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1033 * have to be fixed to support this. But that'll have to wait till next week.
1034 *
1035 * An alternative is to keep track of the changed PTEs together with the
1036 * GCPhys from the guest PT. This may proove expensive though.
1037 *
1038 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1039 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1040 */
1041 PGM_INVL_GUEST_TLBS();
1042
1043 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1044 return rc;
1045}
1046
1047
1048/**
1049 * \#PF Handler callback for PT write accesses.
1050 *
1051 * @returns VBox status code (appropriate for GC return).
1052 * @param pVM VM Handle.
1053 * @param uErrorCode CPU Error code.
1054 * @param pRegFrame Trap register frame.
1055 * NULL on DMA and other non CPU access.
1056 * @param pvFault The fault address (cr2).
1057 * @param GCPhysFault The GC physical address corresponding to pvFault.
1058 * @param pvUser User argument.
1059 */
1060DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1061{
1062 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1063 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1064 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1065 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1066
1067 /*
1068 * We should ALWAYS have the list head as user parameter. This
1069 * is because we use that page to record the changes.
1070 */
1071 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1072
1073 /*
1074 * Disassemble the faulting instruction.
1075 */
1076 DISCPUSTATE Cpu;
1077 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1078 AssertRCReturn(rc, rc);
1079
1080 /*
1081 * Check if it's worth dealing with.
1082 */
1083 bool fReused = false;
1084 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1085 || pPage->fCR3Mix)
1086 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1087 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1088 {
1089 /*
1090 * Simple instructions, no REP prefix.
1091 */
1092 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1093 {
1094 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1095 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1096 return rc;
1097 }
1098
1099 /*
1100 * Windows is frequently doing small memset() operations (netio test 4k+).
1101 * We have to deal with these or we'll kill the cache and performance.
1102 */
1103 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1104 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1105 && pRegFrame->ecx <= 0x20
1106 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1107 && !((uintptr_t)pvFault & 3)
1108 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1109 && Cpu.mode == CPUMODE_32BIT
1110 && Cpu.opmode == CPUMODE_32BIT
1111 && Cpu.addrmode == CPUMODE_32BIT
1112 && Cpu.prefix == PREFIX_REP
1113 && !pRegFrame->eflags.Bits.u1DF
1114 )
1115 {
1116 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1117 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1118 return rc;
1119 }
1120
1121 /* REP prefix, don't bother. */
1122 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1123 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1124 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1125 }
1126
1127 /*
1128 * Not worth it, so flush it.
1129 *
1130 * If we considered it to be reused, don't to back to ring-3
1131 * to emulate failed instructions since we usually cannot
1132 * interpret then. This may be a bit risky, in which case
1133 * the reuse detection must be fixed.
1134 */
1135 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1136 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1137 rc = VINF_SUCCESS;
1138 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1139 return rc;
1140}
1141
1142# endif /* !IN_RING3 */
1143#endif /* PGMPOOL_WITH_MONITORING */
1144
1145#ifdef PGMPOOL_WITH_CACHE
1146
1147/**
1148 * Inserts a page into the GCPhys hash table.
1149 *
1150 * @param pPool The pool.
1151 * @param pPage The page.
1152 */
1153DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1154{
1155 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1156 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1157 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1158 pPage->iNext = pPool->aiHash[iHash];
1159 pPool->aiHash[iHash] = pPage->idx;
1160}
1161
1162
1163/**
1164 * Removes a page from the GCPhys hash table.
1165 *
1166 * @param pPool The pool.
1167 * @param pPage The page.
1168 */
1169DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1170{
1171 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1172 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1173 if (pPool->aiHash[iHash] == pPage->idx)
1174 pPool->aiHash[iHash] = pPage->iNext;
1175 else
1176 {
1177 uint16_t iPrev = pPool->aiHash[iHash];
1178 for (;;)
1179 {
1180 const int16_t i = pPool->aPages[iPrev].iNext;
1181 if (i == pPage->idx)
1182 {
1183 pPool->aPages[iPrev].iNext = pPage->iNext;
1184 break;
1185 }
1186 if (i == NIL_PGMPOOL_IDX)
1187 {
1188 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1189 break;
1190 }
1191 iPrev = i;
1192 }
1193 }
1194 pPage->iNext = NIL_PGMPOOL_IDX;
1195}
1196
1197
1198/**
1199 * Frees up one cache page.
1200 *
1201 * @returns VBox status code.
1202 * @retval VINF_SUCCESS on success.
1203 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1204 * @param pPool The pool.
1205 * @param iUser The user index.
1206 */
1207static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1208{
1209#ifndef IN_RC
1210 const PVM pVM = pPool->CTX_SUFF(pVM);
1211#endif
1212 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1213 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1214
1215 /*
1216 * Select one page from the tail of the age list.
1217 */
1218 uint16_t iToFree = pPool->iAgeTail;
1219 if (iToFree == iUser)
1220 iToFree = pPool->aPages[iToFree].iAgePrev;
1221/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1222 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1223 {
1224 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1225 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1226 {
1227 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1228 continue;
1229 iToFree = i;
1230 break;
1231 }
1232 }
1233*/
1234
1235 Assert(iToFree != iUser);
1236 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1237
1238 PPGMPOOLPAGE pPage = &pPool->aPages[iToFree];
1239
1240 /*
1241 * Reject any attempts at flushing the currently active shadow CR3 mapping
1242 */
1243 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
1244 {
1245 /* Refresh the cr3 mapping by putting it at the head of the age list. */
1246 pgmPoolCacheUsed(pPool, pPage);
1247 return pgmPoolCacheFreeOne(pPool, iUser);
1248 }
1249
1250 int rc = pgmPoolFlushPage(pPool, pPage);
1251 if (rc == VINF_SUCCESS)
1252 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1253 return rc;
1254}
1255
1256
1257/**
1258 * Checks if a kind mismatch is really a page being reused
1259 * or if it's just normal remappings.
1260 *
1261 * @returns true if reused and the cached page (enmKind1) should be flushed
1262 * @returns false if not reused.
1263 * @param enmKind1 The kind of the cached page.
1264 * @param enmKind2 The kind of the requested page.
1265 */
1266static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1267{
1268 switch (enmKind1)
1269 {
1270 /*
1271 * Never reuse them. There is no remapping in non-paging mode.
1272 */
1273 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1274 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1275 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1276 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1277 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1278 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1279 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1280 return true;
1281
1282 /*
1283 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1284 */
1285 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1286 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1287 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1288 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1289 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1290 switch (enmKind2)
1291 {
1292 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1293 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1294 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1295 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1296 case PGMPOOLKIND_64BIT_PML4:
1297 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1298 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1299 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1300 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1301 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1302 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1303 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1304 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1305 return true;
1306 default:
1307 return false;
1308 }
1309
1310 /*
1311 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1312 */
1313 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1314 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1315 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1316 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1317 case PGMPOOLKIND_64BIT_PML4:
1318 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1319 switch (enmKind2)
1320 {
1321 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1322 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1323 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1324 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1325 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1326 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1327 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1328 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1329 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1330 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1331 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1332 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1333 return true;
1334 default:
1335 return false;
1336 }
1337
1338 /*
1339 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1340 */
1341#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1342 case PGMPOOLKIND_ROOT_32BIT_PD:
1343 case PGMPOOLKIND_ROOT_PAE_PD:
1344 case PGMPOOLKIND_ROOT_PDPT:
1345#endif
1346 case PGMPOOLKIND_ROOT_NESTED:
1347 return false;
1348
1349 default:
1350 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1351 }
1352}
1353
1354
1355/**
1356 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1357 *
1358 * @returns VBox status code.
1359 * @retval VINF_PGM_CACHED_PAGE on success.
1360 * @retval VERR_FILE_NOT_FOUND if not found.
1361 * @param pPool The pool.
1362 * @param GCPhys The GC physical address of the page we're gonna shadow.
1363 * @param enmKind The kind of mapping.
1364 * @param iUser The shadow page pool index of the user table.
1365 * @param iUserTable The index into the user table (shadowed).
1366 * @param ppPage Where to store the pointer to the page.
1367 */
1368static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1369{
1370#ifndef IN_RC
1371 const PVM pVM = pPool->CTX_SUFF(pVM);
1372#endif
1373 /*
1374 * Look up the GCPhys in the hash.
1375 */
1376 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1377 Log3(("pgmPoolCacheAlloc: %RGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1378 if (i != NIL_PGMPOOL_IDX)
1379 {
1380 do
1381 {
1382 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1383 Log3(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1384 if (pPage->GCPhys == GCPhys)
1385 {
1386 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1387 {
1388 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1389 if (RT_SUCCESS(rc))
1390 {
1391 *ppPage = pPage;
1392 STAM_COUNTER_INC(&pPool->StatCacheHits);
1393 return VINF_PGM_CACHED_PAGE;
1394 }
1395 return rc;
1396 }
1397
1398 /*
1399 * The kind is different. In some cases we should now flush the page
1400 * as it has been reused, but in most cases this is normal remapping
1401 * of PDs as PT or big pages using the GCPhys field in a slightly
1402 * different way than the other kinds.
1403 */
1404 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1405 {
1406 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1407 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1408 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1409 break;
1410 }
1411 }
1412
1413 /* next */
1414 i = pPage->iNext;
1415 } while (i != NIL_PGMPOOL_IDX);
1416 }
1417
1418 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1419 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1420 return VERR_FILE_NOT_FOUND;
1421}
1422
1423
1424/**
1425 * Inserts a page into the cache.
1426 *
1427 * @param pPool The pool.
1428 * @param pPage The cached page.
1429 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1430 */
1431static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1432{
1433 /*
1434 * Insert into the GCPhys hash if the page is fit for that.
1435 */
1436 Assert(!pPage->fCached);
1437 if (fCanBeCached)
1438 {
1439 pPage->fCached = true;
1440 pgmPoolHashInsert(pPool, pPage);
1441 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1442 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1443 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1444 }
1445 else
1446 {
1447 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1448 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1449 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1450 }
1451
1452 /*
1453 * Insert at the head of the age list.
1454 */
1455 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1456 pPage->iAgeNext = pPool->iAgeHead;
1457 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1458 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1459 else
1460 pPool->iAgeTail = pPage->idx;
1461 pPool->iAgeHead = pPage->idx;
1462}
1463
1464
1465/**
1466 * Flushes a cached page.
1467 *
1468 * @param pPool The pool.
1469 * @param pPage The cached page.
1470 */
1471static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1472{
1473 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1474
1475 /*
1476 * Remove the page from the hash.
1477 */
1478 if (pPage->fCached)
1479 {
1480 pPage->fCached = false;
1481 pgmPoolHashRemove(pPool, pPage);
1482 }
1483 else
1484 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1485
1486 /*
1487 * Remove it from the age list.
1488 */
1489 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1490 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1491 else
1492 pPool->iAgeTail = pPage->iAgePrev;
1493 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1494 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1495 else
1496 pPool->iAgeHead = pPage->iAgeNext;
1497 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1498 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1499}
1500
1501#endif /* PGMPOOL_WITH_CACHE */
1502#ifdef PGMPOOL_WITH_MONITORING
1503
1504/**
1505 * Looks for pages sharing the monitor.
1506 *
1507 * @returns Pointer to the head page.
1508 * @returns NULL if not found.
1509 * @param pPool The Pool
1510 * @param pNewPage The page which is going to be monitored.
1511 */
1512static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1513{
1514#ifdef PGMPOOL_WITH_CACHE
1515 /*
1516 * Look up the GCPhys in the hash.
1517 */
1518 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1519 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1520 if (i == NIL_PGMPOOL_IDX)
1521 return NULL;
1522 do
1523 {
1524 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1525 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1526 && pPage != pNewPage)
1527 {
1528 switch (pPage->enmKind)
1529 {
1530 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1531 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1532 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1533 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1534 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1535 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1536 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1537 case PGMPOOLKIND_64BIT_PML4:
1538#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1539 case PGMPOOLKIND_32BIT_PD:
1540 case PGMPOOLKIND_PAE_PDPT:
1541#else
1542 case PGMPOOLKIND_ROOT_32BIT_PD:
1543 case PGMPOOLKIND_ROOT_PAE_PD:
1544 case PGMPOOLKIND_ROOT_PDPT:
1545#endif
1546 {
1547 /* find the head */
1548 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1549 {
1550 Assert(pPage->iMonitoredPrev != pPage->idx);
1551 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1552 }
1553 return pPage;
1554 }
1555
1556 /* ignore, no monitoring. */
1557 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1558 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1559 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1560 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1561 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1562 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1563 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1564 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1565 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1566 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1567 case PGMPOOLKIND_ROOT_NESTED:
1568 break;
1569 default:
1570 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1571 }
1572 }
1573
1574 /* next */
1575 i = pPage->iNext;
1576 } while (i != NIL_PGMPOOL_IDX);
1577#endif
1578 return NULL;
1579}
1580
1581
1582/**
1583 * Enabled write monitoring of a guest page.
1584 *
1585 * @returns VBox status code.
1586 * @retval VINF_SUCCESS on success.
1587 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1588 * @param pPool The pool.
1589 * @param pPage The cached page.
1590 */
1591static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1592{
1593 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1594
1595 /*
1596 * Filter out the relevant kinds.
1597 */
1598 switch (pPage->enmKind)
1599 {
1600 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1601 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1602 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1603 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1604 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1605 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1606 case PGMPOOLKIND_64BIT_PML4:
1607#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1608 case PGMPOOLKIND_32BIT_PD:
1609 case PGMPOOLKIND_PAE_PDPT:
1610#else
1611 case PGMPOOLKIND_ROOT_PDPT:
1612#endif
1613 break;
1614
1615 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1616 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1617 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1618 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1619 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1620 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1621 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1622 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1623 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1624 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1625 case PGMPOOLKIND_ROOT_NESTED:
1626 /* Nothing to monitor here. */
1627 return VINF_SUCCESS;
1628
1629#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1630 case PGMPOOLKIND_ROOT_32BIT_PD:
1631 case PGMPOOLKIND_ROOT_PAE_PD:
1632#endif
1633#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1634 break;
1635#endif
1636 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1637 default:
1638 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1639 }
1640
1641 /*
1642 * Install handler.
1643 */
1644 int rc;
1645 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1646 if (pPageHead)
1647 {
1648 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1649 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1650 pPage->iMonitoredPrev = pPageHead->idx;
1651 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1652 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1653 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1654 pPageHead->iMonitoredNext = pPage->idx;
1655 rc = VINF_SUCCESS;
1656 }
1657 else
1658 {
1659 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1660 PVM pVM = pPool->CTX_SUFF(pVM);
1661 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1662 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1663 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1664 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1665 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1666 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1667 pPool->pszAccessHandler);
1668 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1669 * the heap size should suffice. */
1670 AssertFatalRC(rc);
1671 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1672 rc = VERR_PGM_POOL_CLEARED;
1673 }
1674 pPage->fMonitored = true;
1675 return rc;
1676}
1677
1678
1679/**
1680 * Disables write monitoring of a guest page.
1681 *
1682 * @returns VBox status code.
1683 * @retval VINF_SUCCESS on success.
1684 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1685 * @param pPool The pool.
1686 * @param pPage The cached page.
1687 */
1688static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1689{
1690 /*
1691 * Filter out the relevant kinds.
1692 */
1693 switch (pPage->enmKind)
1694 {
1695 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1696 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1697 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1698 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1699 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1700 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1701 case PGMPOOLKIND_64BIT_PML4:
1702#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1703 case PGMPOOLKIND_32BIT_PD:
1704 case PGMPOOLKIND_PAE_PDPT:
1705#else
1706 case PGMPOOLKIND_ROOT_PDPT:
1707#endif
1708 break;
1709
1710 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1711 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1712 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1713 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1714 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1715 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1716 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1717 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1718 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1719 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1720 case PGMPOOLKIND_ROOT_NESTED:
1721 /* Nothing to monitor here. */
1722 return VINF_SUCCESS;
1723
1724#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1725 case PGMPOOLKIND_ROOT_32BIT_PD:
1726 case PGMPOOLKIND_ROOT_PAE_PD:
1727#endif
1728#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1729 break;
1730#endif
1731 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1732 default:
1733 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1734 }
1735
1736 /*
1737 * Remove the page from the monitored list or uninstall it if last.
1738 */
1739 const PVM pVM = pPool->CTX_SUFF(pVM);
1740 int rc;
1741 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1742 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1743 {
1744 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1745 {
1746 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1747 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1748 pNewHead->fCR3Mix = pPage->fCR3Mix;
1749 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1750 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1751 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1752 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
1753 pPool->pszAccessHandler);
1754 AssertFatalRCSuccess(rc);
1755 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1756 }
1757 else
1758 {
1759 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1760 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1761 {
1762 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1763 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1764 }
1765 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1766 rc = VINF_SUCCESS;
1767 }
1768 }
1769 else
1770 {
1771 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1772 AssertFatalRC(rc);
1773 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1774 rc = VERR_PGM_POOL_CLEARED;
1775 }
1776 pPage->fMonitored = false;
1777
1778 /*
1779 * Remove it from the list of modified pages (if in it).
1780 */
1781 pgmPoolMonitorModifiedRemove(pPool, pPage);
1782
1783 return rc;
1784}
1785
1786# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1787
1788/**
1789 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1790 *
1791 * @param pPool The Pool.
1792 * @param pPage A page in the chain.
1793 * @param fCR3Mix The new fCR3Mix value.
1794 */
1795static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1796{
1797 /* current */
1798 pPage->fCR3Mix = fCR3Mix;
1799
1800 /* before */
1801 int16_t idx = pPage->iMonitoredPrev;
1802 while (idx != NIL_PGMPOOL_IDX)
1803 {
1804 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1805 idx = pPool->aPages[idx].iMonitoredPrev;
1806 }
1807
1808 /* after */
1809 idx = pPage->iMonitoredNext;
1810 while (idx != NIL_PGMPOOL_IDX)
1811 {
1812 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1813 idx = pPool->aPages[idx].iMonitoredNext;
1814 }
1815}
1816
1817
1818/**
1819 * Installs or modifies monitoring of a CR3 page (special).
1820 *
1821 * We're pretending the CR3 page is shadowed by the pool so we can use the
1822 * generic mechanisms in detecting chained monitoring. (This also gives us a
1823 * tast of what code changes are required to really pool CR3 shadow pages.)
1824 *
1825 * @returns VBox status code.
1826 * @param pPool The pool.
1827 * @param idxRoot The CR3 (root) page index.
1828 * @param GCPhysCR3 The (new) CR3 value.
1829 */
1830int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1831{
1832 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1833 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1834 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d} GCPhysCR3=%RGp\n",
1835 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1836
1837 /*
1838 * The unlikely case where it already matches.
1839 */
1840 if (pPage->GCPhys == GCPhysCR3)
1841 {
1842 Assert(pPage->fMonitored);
1843 return VINF_SUCCESS;
1844 }
1845
1846 /*
1847 * Flush the current monitoring and remove it from the hash.
1848 */
1849 int rc = VINF_SUCCESS;
1850 if (pPage->fMonitored)
1851 {
1852 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1853 rc = pgmPoolMonitorFlush(pPool, pPage);
1854 if (rc == VERR_PGM_POOL_CLEARED)
1855 rc = VINF_SUCCESS;
1856 else
1857 AssertFatalRC(rc);
1858 pgmPoolHashRemove(pPool, pPage);
1859 }
1860
1861 /*
1862 * Monitor the page at the new location and insert it into the hash.
1863 */
1864 pPage->GCPhys = GCPhysCR3;
1865 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1866 if (rc2 != VERR_PGM_POOL_CLEARED)
1867 {
1868 AssertFatalRC(rc2);
1869 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1870 rc = rc2;
1871 }
1872 pgmPoolHashInsert(pPool, pPage);
1873 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1874 return rc;
1875}
1876
1877
1878/**
1879 * Removes the monitoring of a CR3 page (special).
1880 *
1881 * @returns VBox status code.
1882 * @param pPool The pool.
1883 * @param idxRoot The CR3 (root) page index.
1884 */
1885int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1886{
1887 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1888 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1889 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d}\n",
1890 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1891
1892 if (!pPage->fMonitored)
1893 return VINF_SUCCESS;
1894
1895 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1896 int rc = pgmPoolMonitorFlush(pPool, pPage);
1897 if (rc != VERR_PGM_POOL_CLEARED)
1898 AssertFatalRC(rc);
1899 else
1900 rc = VINF_SUCCESS;
1901 pgmPoolHashRemove(pPool, pPage);
1902 Assert(!pPage->fMonitored);
1903 pPage->GCPhys = NIL_RTGCPHYS;
1904 return rc;
1905}
1906
1907# endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1908
1909/**
1910 * Inserts the page into the list of modified pages.
1911 *
1912 * @param pPool The pool.
1913 * @param pPage The page.
1914 */
1915void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1916{
1917 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1918 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1919 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1920 && pPool->iModifiedHead != pPage->idx,
1921 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1922 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1923 pPool->iModifiedHead, pPool->cModifiedPages));
1924
1925 pPage->iModifiedNext = pPool->iModifiedHead;
1926 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1927 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1928 pPool->iModifiedHead = pPage->idx;
1929 pPool->cModifiedPages++;
1930#ifdef VBOX_WITH_STATISTICS
1931 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1932 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1933#endif
1934}
1935
1936
1937/**
1938 * Removes the page from the list of modified pages and resets the
1939 * moficiation counter.
1940 *
1941 * @param pPool The pool.
1942 * @param pPage The page which is believed to be in the list of modified pages.
1943 */
1944static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1945{
1946 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1947 if (pPool->iModifiedHead == pPage->idx)
1948 {
1949 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1950 pPool->iModifiedHead = pPage->iModifiedNext;
1951 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1952 {
1953 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1954 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1955 }
1956 pPool->cModifiedPages--;
1957 }
1958 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1959 {
1960 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1961 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1962 {
1963 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1964 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1965 }
1966 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1967 pPool->cModifiedPages--;
1968 }
1969 else
1970 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1971 pPage->cModifications = 0;
1972}
1973
1974
1975/**
1976 * Zaps the list of modified pages, resetting their modification counters in the process.
1977 *
1978 * @param pVM The VM handle.
1979 */
1980void pgmPoolMonitorModifiedClearAll(PVM pVM)
1981{
1982 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1983 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1984
1985 unsigned cPages = 0; NOREF(cPages);
1986 uint16_t idx = pPool->iModifiedHead;
1987 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1988 while (idx != NIL_PGMPOOL_IDX)
1989 {
1990 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1991 idx = pPage->iModifiedNext;
1992 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1993 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1994 pPage->cModifications = 0;
1995 Assert(++cPages);
1996 }
1997 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1998 pPool->cModifiedPages = 0;
1999}
2000
2001
2002#ifdef IN_RING3
2003/**
2004 * Clear all shadow pages and clear all modification counters.
2005 *
2006 * @param pVM The VM handle.
2007 * @remark Should only be used when monitoring is available, thus placed in
2008 * the PGMPOOL_WITH_MONITORING #ifdef.
2009 */
2010void pgmPoolClearAll(PVM pVM)
2011{
2012 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2013 STAM_PROFILE_START(&pPool->StatClearAll, c);
2014 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2015
2016 /*
2017 * Iterate all the pages until we've encountered all that in use.
2018 * This is simple but not quite optimal solution.
2019 */
2020 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2021 unsigned cLeft = pPool->cUsedPages;
2022 unsigned iPage = pPool->cCurPages;
2023 while (--iPage >= PGMPOOL_IDX_FIRST)
2024 {
2025 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2026 if (pPage->GCPhys != NIL_RTGCPHYS)
2027 {
2028 switch (pPage->enmKind)
2029 {
2030 /*
2031 * We only care about shadow page tables.
2032 */
2033 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2034 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2035 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2036 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2037 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2038 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2039 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2040 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2041 {
2042#ifdef PGMPOOL_WITH_USER_TRACKING
2043 if (pPage->cPresent)
2044#endif
2045 {
2046 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2047 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2048 ASMMemZeroPage(pvShw);
2049 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2050#ifdef PGMPOOL_WITH_USER_TRACKING
2051 pPage->cPresent = 0;
2052 pPage->iFirstPresent = ~0;
2053#endif
2054 }
2055 }
2056 /* fall thru */
2057
2058 default:
2059 Assert(!pPage->cModifications || ++cModifiedPages);
2060 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2061 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2062 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2063 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2064 pPage->cModifications = 0;
2065 break;
2066
2067 }
2068 if (!--cLeft)
2069 break;
2070 }
2071 }
2072
2073 /* swipe the special pages too. */
2074 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2075 {
2076 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2077 if (pPage->GCPhys != NIL_RTGCPHYS)
2078 {
2079 Assert(!pPage->cModifications || ++cModifiedPages);
2080 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2081 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2082 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2083 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2084 pPage->cModifications = 0;
2085 }
2086 }
2087
2088#ifndef DEBUG_michael
2089 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2090#endif
2091 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2092 pPool->cModifiedPages = 0;
2093
2094#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2095 /*
2096 * Clear all the GCPhys links and rebuild the phys ext free list.
2097 */
2098 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2099 pRam;
2100 pRam = pRam->CTX_SUFF(pNext))
2101 {
2102 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2103 while (iPage-- > 0)
2104 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2105 }
2106
2107 pPool->iPhysExtFreeHead = 0;
2108 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2109 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2110 for (unsigned i = 0; i < cMaxPhysExts; i++)
2111 {
2112 paPhysExts[i].iNext = i + 1;
2113 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2114 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2115 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2116 }
2117 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2118#endif
2119
2120
2121 pPool->cPresent = 0;
2122 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2123}
2124#endif /* IN_RING3 */
2125
2126
2127/**
2128 * Handle SyncCR3 pool tasks
2129 *
2130 * @returns VBox status code.
2131 * @retval VINF_SUCCESS if successfully added.
2132 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2133 * @param pVM The VM handle.
2134 * @remark Should only be used when monitoring is available, thus placed in
2135 * the PGMPOOL_WITH_MONITORING #ifdef.
2136 */
2137int pgmPoolSyncCR3(PVM pVM)
2138{
2139 /*
2140 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2141 * Occasionally we will have to clear all the shadow page tables because we wanted
2142 * to monitor a page which was mapped by too many shadowed page tables. This operation
2143 * sometimes refered to as a 'lightweight flush'.
2144 */
2145 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2146 pgmPoolMonitorModifiedClearAll(pVM);
2147 else
2148 {
2149# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2150 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2151 pgmPoolClearAll(pVM);
2152# else /* !IN_RING3 */
2153 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2154 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2155 return VINF_PGM_SYNC_CR3;
2156# endif /* !IN_RING3 */
2157 }
2158 return VINF_SUCCESS;
2159}
2160
2161#endif /* PGMPOOL_WITH_MONITORING */
2162#ifdef PGMPOOL_WITH_USER_TRACKING
2163
2164/**
2165 * Frees up at least one user entry.
2166 *
2167 * @returns VBox status code.
2168 * @retval VINF_SUCCESS if successfully added.
2169 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2170 * @param pPool The pool.
2171 * @param iUser The user index.
2172 */
2173static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2174{
2175 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2176#ifdef PGMPOOL_WITH_CACHE
2177 /*
2178 * Just free cached pages in a braindead fashion.
2179 */
2180 /** @todo walk the age list backwards and free the first with usage. */
2181 int rc = VINF_SUCCESS;
2182 do
2183 {
2184 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2185 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2186 rc = rc2;
2187 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2188 return rc;
2189#else
2190 /*
2191 * Lazy approach.
2192 */
2193 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2194 Assert(!CPUMIsGuestInLongMode(pVM));
2195 pgmPoolFlushAllInt(pPool);
2196 return VERR_PGM_POOL_FLUSHED;
2197#endif
2198}
2199
2200
2201/**
2202 * Inserts a page into the cache.
2203 *
2204 * This will create user node for the page, insert it into the GCPhys
2205 * hash, and insert it into the age list.
2206 *
2207 * @returns VBox status code.
2208 * @retval VINF_SUCCESS if successfully added.
2209 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2210 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2211 * @param pPool The pool.
2212 * @param pPage The cached page.
2213 * @param GCPhys The GC physical address of the page we're gonna shadow.
2214 * @param iUser The user index.
2215 * @param iUserTable The user table index.
2216 */
2217DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2218{
2219 int rc = VINF_SUCCESS;
2220 PPGMPOOLUSER pUser = pPool->CTX_SUFF(paUsers);
2221
2222 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
2223
2224 /*
2225 * Find free a user node.
2226 */
2227 uint16_t i = pPool->iUserFreeHead;
2228 if (i == NIL_PGMPOOL_USER_INDEX)
2229 {
2230 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2231 if (RT_FAILURE(rc))
2232 return rc;
2233 i = pPool->iUserFreeHead;
2234 }
2235
2236 /*
2237 * Unlink the user node from the free list,
2238 * initialize and insert it into the user list.
2239 */
2240 pPool->iUserFreeHead = pUser[i].iNext;
2241 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
2242 pUser[i].iUser = iUser;
2243 pUser[i].iUserTable = iUserTable;
2244 pPage->iUserHead = i;
2245
2246 /*
2247 * Insert into cache and enable monitoring of the guest page if enabled.
2248 *
2249 * Until we implement caching of all levels, including the CR3 one, we'll
2250 * have to make sure we don't try monitor & cache any recursive reuse of
2251 * a monitored CR3 page. Because all windows versions are doing this we'll
2252 * have to be able to do combined access monitoring, CR3 + PT and
2253 * PD + PT (guest PAE).
2254 *
2255 * Update:
2256 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2257 */
2258#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2259# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2260 const bool fCanBeMonitored = true;
2261# else
2262 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2263 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2264 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2265# endif
2266# ifdef PGMPOOL_WITH_CACHE
2267 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2268# endif
2269 if (fCanBeMonitored)
2270 {
2271# ifdef PGMPOOL_WITH_MONITORING
2272 rc = pgmPoolMonitorInsert(pPool, pPage);
2273 if (rc == VERR_PGM_POOL_CLEARED)
2274 {
2275 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2276# ifndef PGMPOOL_WITH_CACHE
2277 pgmPoolMonitorFlush(pPool, pPage);
2278 rc = VERR_PGM_POOL_FLUSHED;
2279# endif
2280 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2281 pUser[i].iNext = pPool->iUserFreeHead;
2282 pUser[i].iUser = NIL_PGMPOOL_IDX;
2283 pPool->iUserFreeHead = i;
2284 }
2285 }
2286# endif
2287#endif /* PGMPOOL_WITH_MONITORING */
2288 return rc;
2289}
2290
2291
2292# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2293/**
2294 * Adds a user reference to a page.
2295 *
2296 * This will
2297 * This will move the page to the head of the
2298 *
2299 * @returns VBox status code.
2300 * @retval VINF_SUCCESS if successfully added.
2301 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2302 * @param pPool The pool.
2303 * @param pPage The cached page.
2304 * @param iUser The user index.
2305 * @param iUserTable The user table.
2306 */
2307static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2308{
2309 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2310
2311 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
2312# ifdef VBOX_STRICT
2313 /*
2314 * Check that the entry doesn't already exists.
2315 */
2316 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2317 {
2318 uint16_t i = pPage->iUserHead;
2319 do
2320 {
2321 Assert(i < pPool->cMaxUsers);
2322 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2323 i = paUsers[i].iNext;
2324 } while (i != NIL_PGMPOOL_USER_INDEX);
2325 }
2326# endif
2327
2328 /*
2329 * Allocate a user node.
2330 */
2331 uint16_t i = pPool->iUserFreeHead;
2332 if (i == NIL_PGMPOOL_USER_INDEX)
2333 {
2334 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2335 if (RT_FAILURE(rc))
2336 return rc;
2337 i = pPool->iUserFreeHead;
2338 }
2339 pPool->iUserFreeHead = paUsers[i].iNext;
2340
2341 /*
2342 * Initialize the user node and insert it.
2343 */
2344 paUsers[i].iNext = pPage->iUserHead;
2345 paUsers[i].iUser = iUser;
2346 paUsers[i].iUserTable = iUserTable;
2347 pPage->iUserHead = i;
2348
2349# ifdef PGMPOOL_WITH_CACHE
2350 /*
2351 * Tell the cache to update its replacement stats for this page.
2352 */
2353 pgmPoolCacheUsed(pPool, pPage);
2354# endif
2355 return VINF_SUCCESS;
2356}
2357# endif /* PGMPOOL_WITH_CACHE */
2358
2359
2360/**
2361 * Frees a user record associated with a page.
2362 *
2363 * This does not clear the entry in the user table, it simply replaces the
2364 * user record to the chain of free records.
2365 *
2366 * @param pPool The pool.
2367 * @param HCPhys The HC physical address of the shadow page.
2368 * @param iUser The shadow page pool index of the user table.
2369 * @param iUserTable The index into the user table (shadowed).
2370 */
2371static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2372{
2373 /*
2374 * Unlink and free the specified user entry.
2375 */
2376 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2377
2378 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2379 uint16_t i = pPage->iUserHead;
2380 if ( i != NIL_PGMPOOL_USER_INDEX
2381 && paUsers[i].iUser == iUser
2382 && paUsers[i].iUserTable == iUserTable)
2383 {
2384 pPage->iUserHead = paUsers[i].iNext;
2385
2386 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2387 paUsers[i].iNext = pPool->iUserFreeHead;
2388 pPool->iUserFreeHead = i;
2389 return;
2390 }
2391
2392 /* General: Linear search. */
2393 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2394 while (i != NIL_PGMPOOL_USER_INDEX)
2395 {
2396 if ( paUsers[i].iUser == iUser
2397 && paUsers[i].iUserTable == iUserTable)
2398 {
2399 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2400 paUsers[iPrev].iNext = paUsers[i].iNext;
2401 else
2402 pPage->iUserHead = paUsers[i].iNext;
2403
2404 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2405 paUsers[i].iNext = pPool->iUserFreeHead;
2406 pPool->iUserFreeHead = i;
2407 return;
2408 }
2409 iPrev = i;
2410 i = paUsers[i].iNext;
2411 }
2412
2413 /* Fatal: didn't find it */
2414 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2415 iUser, iUserTable, pPage->GCPhys));
2416}
2417
2418
2419/**
2420 * Gets the entry size of a shadow table.
2421 *
2422 * @param enmKind The kind of page.
2423 *
2424 * @returns The size of the entry in bytes. That is, 4 or 8.
2425 * @returns If the kind is not for a table, an assertion is raised and 0 is
2426 * returned.
2427 */
2428DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2429{
2430 switch (enmKind)
2431 {
2432 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2433 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2434 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2435#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2436 case PGMPOOLKIND_32BIT_PD:
2437#else
2438 case PGMPOOLKIND_ROOT_32BIT_PD:
2439#endif
2440 return 4;
2441
2442 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2443 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2444 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2445 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2446 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2447 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2448 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2449 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2450 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2451 case PGMPOOLKIND_64BIT_PML4:
2452#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2453 case PGMPOOLKIND_ROOT_PAE_PD:
2454 case PGMPOOLKIND_ROOT_PDPT:
2455#endif
2456 case PGMPOOLKIND_PAE_PDPT:
2457 case PGMPOOLKIND_ROOT_NESTED:
2458 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2459 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2460 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2461 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2462 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2463 return 8;
2464
2465 default:
2466 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2467 }
2468}
2469
2470
2471/**
2472 * Gets the entry size of a guest table.
2473 *
2474 * @param enmKind The kind of page.
2475 *
2476 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2477 * @returns If the kind is not for a table, an assertion is raised and 0 is
2478 * returned.
2479 */
2480DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2481{
2482 switch (enmKind)
2483 {
2484 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2485 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2486#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2487 case PGMPOOLKIND_32BIT_PD:
2488#else
2489 case PGMPOOLKIND_ROOT_32BIT_PD:
2490#endif
2491 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2492 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2493 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2494 return 4;
2495
2496 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2497 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2498 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2499 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2500 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2501 case PGMPOOLKIND_64BIT_PML4:
2502#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2503 case PGMPOOLKIND_PAE_PDPT:
2504#else
2505 case PGMPOOLKIND_ROOT_PAE_PD:
2506 case PGMPOOLKIND_ROOT_PDPT:
2507#endif
2508 return 8;
2509
2510 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2511 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2512 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2513 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2514 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2515 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2516 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2517 case PGMPOOLKIND_ROOT_NESTED:
2518 /** @todo can we return 0? (nobody is calling this...) */
2519 AssertFailed();
2520 return 0;
2521
2522 default:
2523 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2524 }
2525}
2526
2527#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2528
2529/**
2530 * Scans one shadow page table for mappings of a physical page.
2531 *
2532 * @param pVM The VM handle.
2533 * @param pPhysPage The guest page in question.
2534 * @param iShw The shadow page table.
2535 * @param cRefs The number of references made in that PT.
2536 */
2537static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2538{
2539 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2540 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2541
2542 /*
2543 * Assert sanity.
2544 */
2545 Assert(cRefs == 1);
2546 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2547 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2548
2549 /*
2550 * Then, clear the actual mappings to the page in the shadow PT.
2551 */
2552 switch (pPage->enmKind)
2553 {
2554 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2555 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2556 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2557 {
2558 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2559 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2560 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2561 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2562 {
2563 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2564 pPT->a[i].u = 0;
2565 cRefs--;
2566 if (!cRefs)
2567 return;
2568 }
2569#ifdef LOG_ENABLED
2570 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2571 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2572 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2573 {
2574 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2575 pPT->a[i].u = 0;
2576 }
2577#endif
2578 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2579 break;
2580 }
2581
2582 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2583 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2584 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2585 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2586 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2587 {
2588 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2589 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2590 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2591 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2592 {
2593 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2594 pPT->a[i].u = 0;
2595 cRefs--;
2596 if (!cRefs)
2597 return;
2598 }
2599#ifdef LOG_ENABLED
2600 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2601 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2602 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2603 {
2604 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2605 pPT->a[i].u = 0;
2606 }
2607#endif
2608 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
2609 break;
2610 }
2611
2612 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2613 {
2614 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2615 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2616 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2617 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2618 {
2619 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2620 pPT->a[i].u = 0;
2621 cRefs--;
2622 if (!cRefs)
2623 return;
2624 }
2625#ifdef LOG_ENABLED
2626 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2627 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2628 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2629 {
2630 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2631 pPT->a[i].u = 0;
2632 }
2633#endif
2634 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2635 break;
2636 }
2637
2638 default:
2639 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2640 }
2641}
2642
2643
2644/**
2645 * Scans one shadow page table for mappings of a physical page.
2646 *
2647 * @param pVM The VM handle.
2648 * @param pPhysPage The guest page in question.
2649 * @param iShw The shadow page table.
2650 * @param cRefs The number of references made in that PT.
2651 */
2652void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2653{
2654 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2655 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2656 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2657 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2658 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2659 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2660}
2661
2662
2663/**
2664 * Flushes a list of shadow page tables mapping the same physical page.
2665 *
2666 * @param pVM The VM handle.
2667 * @param pPhysPage The guest page in question.
2668 * @param iPhysExt The physical cross reference extent list to flush.
2669 */
2670void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2671{
2672 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2673 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2674 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2675
2676 const uint16_t iPhysExtStart = iPhysExt;
2677 PPGMPOOLPHYSEXT pPhysExt;
2678 do
2679 {
2680 Assert(iPhysExt < pPool->cMaxPhysExts);
2681 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2682 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2683 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2684 {
2685 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2686 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2687 }
2688
2689 /* next */
2690 iPhysExt = pPhysExt->iNext;
2691 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2692
2693 /* insert the list into the free list and clear the ram range entry. */
2694 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2695 pPool->iPhysExtFreeHead = iPhysExtStart;
2696 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2697
2698 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2699}
2700
2701#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2702
2703/**
2704 * Scans all shadow page tables for mappings of a physical page.
2705 *
2706 * This may be slow, but it's most likely more efficient than cleaning
2707 * out the entire page pool / cache.
2708 *
2709 * @returns VBox status code.
2710 * @retval VINF_SUCCESS if all references has been successfully cleared.
2711 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2712 * a page pool cleaning.
2713 *
2714 * @param pVM The VM handle.
2715 * @param pPhysPage The guest page in question.
2716 */
2717int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2718{
2719 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2720 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2721 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2722 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2723
2724#if 1
2725 /*
2726 * There is a limit to what makes sense.
2727 */
2728 if (pPool->cPresent > 1024)
2729 {
2730 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2731 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2732 return VINF_PGM_GCPHYS_ALIASED;
2733 }
2734#endif
2735
2736 /*
2737 * Iterate all the pages until we've encountered all that in use.
2738 * This is simple but not quite optimal solution.
2739 */
2740 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2741 const uint32_t u32 = u64;
2742 unsigned cLeft = pPool->cUsedPages;
2743 unsigned iPage = pPool->cCurPages;
2744 while (--iPage >= PGMPOOL_IDX_FIRST)
2745 {
2746 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2747 if (pPage->GCPhys != NIL_RTGCPHYS)
2748 {
2749 switch (pPage->enmKind)
2750 {
2751 /*
2752 * We only care about shadow page tables.
2753 */
2754 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2755 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2756 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2757 {
2758 unsigned cPresent = pPage->cPresent;
2759 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2760 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2761 if (pPT->a[i].n.u1Present)
2762 {
2763 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2764 {
2765 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2766 pPT->a[i].u = 0;
2767 }
2768 if (!--cPresent)
2769 break;
2770 }
2771 break;
2772 }
2773
2774 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2775 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2776 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2777 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2778 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2779 {
2780 unsigned cPresent = pPage->cPresent;
2781 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2782 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2783 if (pPT->a[i].n.u1Present)
2784 {
2785 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2786 {
2787 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2788 pPT->a[i].u = 0;
2789 }
2790 if (!--cPresent)
2791 break;
2792 }
2793 break;
2794 }
2795 }
2796 if (!--cLeft)
2797 break;
2798 }
2799 }
2800
2801 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2802 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2803 return VINF_SUCCESS;
2804}
2805
2806
2807/**
2808 * Clears the user entry in a user table.
2809 *
2810 * This is used to remove all references to a page when flushing it.
2811 */
2812static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2813{
2814 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2815 Assert(pUser->iUser < pPool->cCurPages);
2816 uint32_t iUserTable = pUser->iUserTable;
2817
2818 /*
2819 * Map the user page.
2820 */
2821 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2822#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2823 if (pUserPage->enmKind == PGMPOOLKIND_ROOT_PAE_PD)
2824 {
2825 /* Must translate the fake 2048 entry PD to a 512 PD one since the R0 mapping is not linear. */
2826 Assert(pUser->iUser == PGMPOOL_IDX_PAE_PD);
2827 uint32_t iPdpt = iUserTable / X86_PG_PAE_ENTRIES;
2828 iUserTable %= X86_PG_PAE_ENTRIES;
2829 pUserPage = &pPool->aPages[PGMPOOL_IDX_PAE_PD_0 + iPdpt];
2830 Assert(pUserPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD);
2831 }
2832#endif
2833 union
2834 {
2835 uint64_t *pau64;
2836 uint32_t *pau32;
2837 } u;
2838 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
2839
2840 /* Safety precaution in case we change the paging for other modes too in the future. */
2841 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
2842
2843#ifdef VBOX_STRICT
2844 /*
2845 * Some sanity checks.
2846 */
2847 switch (pUserPage->enmKind)
2848 {
2849# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2850 case PGMPOOLKIND_32BIT_PD:
2851 Assert(iUserTable < X86_PG_ENTRIES);
2852 break;
2853# else
2854 case PGMPOOLKIND_ROOT_32BIT_PD:
2855 Assert(iUserTable < X86_PG_ENTRIES);
2856 Assert(!(u.pau32[iUserTable] & PGM_PDFLAGS_MAPPING));
2857 break;
2858# endif
2859# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2860 case PGMPOOLKIND_ROOT_PAE_PD:
2861 Assert(iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2862 AssertMsg(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING), ("%llx %d\n", u.pau64[iUserTable], iUserTable));
2863 break;
2864# endif
2865# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2866 case PGMPOOLKIND_PAE_PDPT:
2867# else
2868 case PGMPOOLKIND_ROOT_PDPT:
2869# endif
2870 Assert(iUserTable < 4);
2871 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2872 break;
2873 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2874 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2875 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2876 break;
2877 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2878 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2879 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
2880 break;
2881 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2882 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2883 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2884 break;
2885 case PGMPOOLKIND_64BIT_PML4:
2886 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2887 /* GCPhys >> PAGE_SHIFT is the index here */
2888 break;
2889 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2890 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2891 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2892 break;
2893
2894 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2895 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2896 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2897 break;
2898
2899 case PGMPOOLKIND_ROOT_NESTED:
2900 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2901 break;
2902
2903 default:
2904 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2905 break;
2906 }
2907#endif /* VBOX_STRICT */
2908
2909 /*
2910 * Clear the entry in the user page.
2911 */
2912 switch (pUserPage->enmKind)
2913 {
2914 /* 32-bit entries */
2915#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2916 case PGMPOOLKIND_32BIT_PD:
2917#else
2918 case PGMPOOLKIND_ROOT_32BIT_PD:
2919#endif
2920 u.pau32[iUserTable] = 0;
2921 break;
2922
2923 /* 64-bit entries */
2924 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2925 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2926 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2927 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2928 case PGMPOOLKIND_64BIT_PML4:
2929 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2930 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2931# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2932 case PGMPOOLKIND_ROOT_PAE_PD:
2933#endif
2934#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2935 case PGMPOOLKIND_PAE_PDPT:
2936#else
2937 case PGMPOOLKIND_ROOT_PDPT:
2938#endif
2939 case PGMPOOLKIND_ROOT_NESTED:
2940 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2941 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2942 u.pau64[iUserTable] = 0;
2943 break;
2944
2945 default:
2946 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2947 }
2948}
2949
2950
2951/**
2952 * Clears all users of a page.
2953 */
2954static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2955{
2956 /*
2957 * Free all the user records.
2958 */
2959 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2960 uint16_t i = pPage->iUserHead;
2961 while (i != NIL_PGMPOOL_USER_INDEX)
2962 {
2963 /* Clear enter in user table. */
2964 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2965
2966 /* Free it. */
2967 const uint16_t iNext = paUsers[i].iNext;
2968 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2969 paUsers[i].iNext = pPool->iUserFreeHead;
2970 pPool->iUserFreeHead = i;
2971
2972 /* Next. */
2973 i = iNext;
2974 }
2975 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2976}
2977
2978#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2979
2980/**
2981 * Allocates a new physical cross reference extent.
2982 *
2983 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2984 * @param pVM The VM handle.
2985 * @param piPhysExt Where to store the phys ext index.
2986 */
2987PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2988{
2989 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2990 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2991 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2992 {
2993 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2994 return NULL;
2995 }
2996 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2997 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2998 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2999 *piPhysExt = iPhysExt;
3000 return pPhysExt;
3001}
3002
3003
3004/**
3005 * Frees a physical cross reference extent.
3006 *
3007 * @param pVM The VM handle.
3008 * @param iPhysExt The extent to free.
3009 */
3010void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3011{
3012 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3013 Assert(iPhysExt < pPool->cMaxPhysExts);
3014 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3015 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3016 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3017 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3018 pPool->iPhysExtFreeHead = iPhysExt;
3019}
3020
3021
3022/**
3023 * Frees a physical cross reference extent.
3024 *
3025 * @param pVM The VM handle.
3026 * @param iPhysExt The extent to free.
3027 */
3028void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3029{
3030 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3031
3032 const uint16_t iPhysExtStart = iPhysExt;
3033 PPGMPOOLPHYSEXT pPhysExt;
3034 do
3035 {
3036 Assert(iPhysExt < pPool->cMaxPhysExts);
3037 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3038 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3039 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3040
3041 /* next */
3042 iPhysExt = pPhysExt->iNext;
3043 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3044
3045 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3046 pPool->iPhysExtFreeHead = iPhysExtStart;
3047}
3048
3049
3050/**
3051 * Insert a reference into a list of physical cross reference extents.
3052 *
3053 * @returns The new ram range flags (top 16-bits).
3054 *
3055 * @param pVM The VM handle.
3056 * @param iPhysExt The physical extent index of the list head.
3057 * @param iShwPT The shadow page table index.
3058 *
3059 */
3060static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3061{
3062 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3063 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3064
3065 /* special common case. */
3066 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3067 {
3068 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3069 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3070 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
3071 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3072 }
3073
3074 /* general treatment. */
3075 const uint16_t iPhysExtStart = iPhysExt;
3076 unsigned cMax = 15;
3077 for (;;)
3078 {
3079 Assert(iPhysExt < pPool->cMaxPhysExts);
3080 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3081 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3082 {
3083 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3084 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3085 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3086 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3087 }
3088 if (!--cMax)
3089 {
3090 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3091 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3092 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
3093 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3094 }
3095 }
3096
3097 /* add another extent to the list. */
3098 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3099 if (!pNew)
3100 {
3101 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3102 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3103 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3104 }
3105 pNew->iNext = iPhysExtStart;
3106 pNew->aidx[0] = iShwPT;
3107 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3108 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3109}
3110
3111
3112/**
3113 * Add a reference to guest physical page where extents are in use.
3114 *
3115 * @returns The new ram range flags (top 16-bits).
3116 *
3117 * @param pVM The VM handle.
3118 * @param u16 The ram range flags (top 16-bits).
3119 * @param iShwPT The shadow page table index.
3120 */
3121uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3122{
3123 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
3124 {
3125 /*
3126 * Convert to extent list.
3127 */
3128 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
3129 uint16_t iPhysExt;
3130 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3131 if (pPhysExt)
3132 {
3133 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
3134 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3135 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
3136 pPhysExt->aidx[1] = iShwPT;
3137 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3138 }
3139 else
3140 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3141 }
3142 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
3143 {
3144 /*
3145 * Insert into the extent list.
3146 */
3147 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
3148 }
3149 else
3150 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3151 return u16;
3152}
3153
3154
3155/**
3156 * Clear references to guest physical memory.
3157 *
3158 * @param pPool The pool.
3159 * @param pPage The page.
3160 * @param pPhysPage Pointer to the aPages entry in the ram range.
3161 */
3162void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3163{
3164 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
3165 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3166
3167 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
3168 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
3169 {
3170 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3171 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3172 do
3173 {
3174 Assert(iPhysExt < pPool->cMaxPhysExts);
3175
3176 /*
3177 * Look for the shadow page and check if it's all freed.
3178 */
3179 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3180 {
3181 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3182 {
3183 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3184
3185 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3186 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3187 {
3188 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3189 return;
3190 }
3191
3192 /* we can free the node. */
3193 PVM pVM = pPool->CTX_SUFF(pVM);
3194 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3195 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3196 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3197 {
3198 /* lonely node */
3199 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3200 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
3201 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3202 }
3203 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3204 {
3205 /* head */
3206 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
3207 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
3208 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
3209 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
3210 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3211 }
3212 else
3213 {
3214 /* in list */
3215 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3216 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3217 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3218 }
3219 iPhysExt = iPhysExtNext;
3220 return;
3221 }
3222 }
3223
3224 /* next */
3225 iPhysExtPrev = iPhysExt;
3226 iPhysExt = paPhysExts[iPhysExt].iNext;
3227 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3228
3229 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3230 }
3231 else /* nothing to do */
3232 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
3233}
3234
3235
3236/**
3237 * Clear references to guest physical memory.
3238 *
3239 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3240 * is assumed to be correct, so the linear search can be skipped and we can assert
3241 * at an earlier point.
3242 *
3243 * @param pPool The pool.
3244 * @param pPage The page.
3245 * @param HCPhys The host physical address corresponding to the guest page.
3246 * @param GCPhys The guest physical address corresponding to HCPhys.
3247 */
3248static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3249{
3250 /*
3251 * Walk range list.
3252 */
3253 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3254 while (pRam)
3255 {
3256 RTGCPHYS off = GCPhys - pRam->GCPhys;
3257 if (off < pRam->cb)
3258 {
3259 /* does it match? */
3260 const unsigned iPage = off >> PAGE_SHIFT;
3261 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3262#ifdef LOG_ENABLED
3263RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3264Log(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3265#endif
3266 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3267 {
3268 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3269 return;
3270 }
3271 break;
3272 }
3273 pRam = pRam->CTX_SUFF(pNext);
3274 }
3275 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3276}
3277
3278
3279/**
3280 * Clear references to guest physical memory.
3281 *
3282 * @param pPool The pool.
3283 * @param pPage The page.
3284 * @param HCPhys The host physical address corresponding to the guest page.
3285 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3286 */
3287static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3288{
3289 /*
3290 * Walk range list.
3291 */
3292 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3293 while (pRam)
3294 {
3295 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3296 if (off < pRam->cb)
3297 {
3298 /* does it match? */
3299 const unsigned iPage = off >> PAGE_SHIFT;
3300 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3301 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3302 {
3303 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3304 return;
3305 }
3306 break;
3307 }
3308 pRam = pRam->CTX_SUFF(pNext);
3309 }
3310
3311 /*
3312 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3313 */
3314 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3315 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3316 while (pRam)
3317 {
3318 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3319 while (iPage-- > 0)
3320 {
3321 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3322 {
3323 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3324 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3325 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3326 return;
3327 }
3328 }
3329 pRam = pRam->CTX_SUFF(pNext);
3330 }
3331
3332 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3333}
3334
3335
3336/**
3337 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3338 *
3339 * @param pPool The pool.
3340 * @param pPage The page.
3341 * @param pShwPT The shadow page table (mapping of the page).
3342 * @param pGstPT The guest page table.
3343 */
3344DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3345{
3346 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3347 if (pShwPT->a[i].n.u1Present)
3348 {
3349 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3350 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3351 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3352 if (!--pPage->cPresent)
3353 break;
3354 }
3355}
3356
3357
3358/**
3359 * Clear references to guest physical memory in a PAE / 32-bit page table.
3360 *
3361 * @param pPool The pool.
3362 * @param pPage The page.
3363 * @param pShwPT The shadow page table (mapping of the page).
3364 * @param pGstPT The guest page table (just a half one).
3365 */
3366DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3367{
3368 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3369 if (pShwPT->a[i].n.u1Present)
3370 {
3371 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3372 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3373 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3374 }
3375}
3376
3377
3378/**
3379 * Clear references to guest physical memory in a PAE / PAE page table.
3380 *
3381 * @param pPool The pool.
3382 * @param pPage The page.
3383 * @param pShwPT The shadow page table (mapping of the page).
3384 * @param pGstPT The guest page table.
3385 */
3386DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3387{
3388 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3389 if (pShwPT->a[i].n.u1Present)
3390 {
3391 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3392 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3393 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3394 }
3395}
3396
3397
3398/**
3399 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3400 *
3401 * @param pPool The pool.
3402 * @param pPage The page.
3403 * @param pShwPT The shadow page table (mapping of the page).
3404 */
3405DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3406{
3407 RTGCPHYS GCPhys = pPage->GCPhys;
3408 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3409 if (pShwPT->a[i].n.u1Present)
3410 {
3411 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3412 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3413 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3414 }
3415}
3416
3417
3418/**
3419 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3420 *
3421 * @param pPool The pool.
3422 * @param pPage The page.
3423 * @param pShwPT The shadow page table (mapping of the page).
3424 */
3425DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3426{
3427 RTGCPHYS GCPhys = pPage->GCPhys;
3428 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3429 if (pShwPT->a[i].n.u1Present)
3430 {
3431 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3432 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3433 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3434 }
3435}
3436
3437#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3438
3439
3440#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3441/**
3442 * Clear references to shadowed pages in a 32 bits page directory.
3443 *
3444 * @param pPool The pool.
3445 * @param pPage The page.
3446 * @param pShwPD The shadow page directory (mapping of the page).
3447 */
3448DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
3449{
3450 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3451 {
3452 if ( pShwPD->a[i].n.u1Present
3453 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3454 )
3455 {
3456 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
3457 if (pSubPage)
3458 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3459 else
3460 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
3461 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3462 }
3463 }
3464}
3465#endif
3466
3467/**
3468 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3469 *
3470 * @param pPool The pool.
3471 * @param pPage The page.
3472 * @param pShwPD The shadow page directory (mapping of the page).
3473 */
3474DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3475{
3476 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3477 {
3478 if ( pShwPD->a[i].n.u1Present
3479#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3480 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3481#endif
3482 )
3483 {
3484 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3485 if (pSubPage)
3486 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3487 else
3488 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3489 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3490 }
3491 }
3492}
3493
3494
3495/**
3496 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3497 *
3498 * @param pPool The pool.
3499 * @param pPage The page.
3500 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3501 */
3502DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3503{
3504 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3505 {
3506 if ( pShwPDPT->a[i].n.u1Present
3507#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3508 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
3509#endif
3510 )
3511 {
3512 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3513 if (pSubPage)
3514 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3515 else
3516 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3517 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3518 }
3519 }
3520}
3521
3522
3523/**
3524 * Clear references to shadowed pages in a 64-bit level 4 page table.
3525 *
3526 * @param pPool The pool.
3527 * @param pPage The page.
3528 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3529 */
3530DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3531{
3532 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3533 {
3534 if (pShwPML4->a[i].n.u1Present)
3535 {
3536 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3537 if (pSubPage)
3538 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3539 else
3540 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3541 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3542 }
3543 }
3544}
3545
3546
3547/**
3548 * Clear references to shadowed pages in an EPT page table.
3549 *
3550 * @param pPool The pool.
3551 * @param pPage The page.
3552 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3553 */
3554DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3555{
3556 RTGCPHYS GCPhys = pPage->GCPhys;
3557 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3558 if (pShwPT->a[i].n.u1Present)
3559 {
3560 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3561 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3562 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3563 }
3564}
3565
3566
3567/**
3568 * Clear references to shadowed pages in an EPT page directory.
3569 *
3570 * @param pPool The pool.
3571 * @param pPage The page.
3572 * @param pShwPD The shadow page directory (mapping of the page).
3573 */
3574DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3575{
3576 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3577 {
3578 if (pShwPD->a[i].n.u1Present)
3579 {
3580 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3581 if (pSubPage)
3582 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3583 else
3584 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3585 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3586 }
3587 }
3588}
3589
3590
3591/**
3592 * Clear references to shadowed pages in an EPT page directory pointer table.
3593 *
3594 * @param pPool The pool.
3595 * @param pPage The page.
3596 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3597 */
3598DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3599{
3600 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3601 {
3602 if (pShwPDPT->a[i].n.u1Present)
3603 {
3604 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3605 if (pSubPage)
3606 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3607 else
3608 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3609 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3610 }
3611 }
3612}
3613
3614
3615/**
3616 * Clears all references made by this page.
3617 *
3618 * This includes other shadow pages and GC physical addresses.
3619 *
3620 * @param pPool The pool.
3621 * @param pPage The page.
3622 */
3623static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3624{
3625 /*
3626 * Map the shadow page and take action according to the page kind.
3627 */
3628 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3629 switch (pPage->enmKind)
3630 {
3631#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3632 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3633 {
3634 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3635 void *pvGst;
3636 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3637 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3638 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3639 break;
3640 }
3641
3642 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3643 {
3644 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3645 void *pvGst;
3646 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3647 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3648 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3649 break;
3650 }
3651
3652 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3653 {
3654 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3655 void *pvGst;
3656 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3657 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3658 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3659 break;
3660 }
3661
3662 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3663 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3664 {
3665 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3666 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3667 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3668 break;
3669 }
3670
3671 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3672 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3673 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3674 {
3675 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3676 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3677 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3678 break;
3679 }
3680
3681#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3682 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3683 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3684 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3685 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3686 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3687 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3688 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3689 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3690 break;
3691#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3692
3693 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
3694 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3695 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3696 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3697 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3698 break;
3699
3700#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3701 case PGMPOOLKIND_32BIT_PD:
3702 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
3703 break;
3704
3705 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3706 case PGMPOOLKIND_PAE_PDPT:
3707#endif
3708 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3709 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3710 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3711 break;
3712
3713 case PGMPOOLKIND_64BIT_PML4:
3714 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3715 break;
3716
3717 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3718 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
3719 break;
3720
3721 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3722 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
3723 break;
3724
3725 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3726 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
3727 break;
3728
3729 default:
3730 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3731 }
3732
3733 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3734 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3735 ASMMemZeroPage(pvShw);
3736 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3737 pPage->fZeroed = true;
3738}
3739
3740#endif /* PGMPOOL_WITH_USER_TRACKING */
3741
3742/**
3743 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3744 *
3745 * @param pPool The pool.
3746 */
3747static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3748{
3749#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3750 /* Start a subset so we won't run out of mapping space. */
3751 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
3752 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3753#endif
3754
3755 /*
3756 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3757 */
3758 Assert(NIL_PGMPOOL_IDX == 0);
3759 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3760 {
3761 /*
3762 * Get the page address.
3763 */
3764 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3765 union
3766 {
3767 uint64_t *pau64;
3768 uint32_t *pau32;
3769 } u;
3770
3771 /*
3772 * Mark stuff not present.
3773 */
3774 switch (pPage->enmKind)
3775 {
3776#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
3777 case PGMPOOLKIND_ROOT_32BIT_PD:
3778 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3779 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3780 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3781 u.pau32[iPage] = 0;
3782 break;
3783
3784 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3785 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3786 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
3787 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3788 u.pau64[iPage] = 0;
3789 break;
3790
3791 case PGMPOOLKIND_ROOT_PDPT:
3792 /* Not root of shadowed pages currently, ignore it. */
3793 break;
3794#endif
3795
3796 case PGMPOOLKIND_ROOT_NESTED:
3797 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3798 ASMMemZero32(u.pau64, PAGE_SIZE);
3799 break;
3800 }
3801 }
3802
3803 /*
3804 * Paranoia (to be removed), flag a global CR3 sync.
3805 */
3806 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
3807
3808#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3809 /* Pop the subset. */
3810 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3811#endif
3812}
3813
3814
3815/**
3816 * Flushes the entire cache.
3817 *
3818 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3819 * and execute this CR3 flush.
3820 *
3821 * @param pPool The pool.
3822 */
3823static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3824{
3825 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3826 LogFlow(("pgmPoolFlushAllInt:\n"));
3827
3828 /*
3829 * If there are no pages in the pool, there is nothing to do.
3830 */
3831 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3832 {
3833 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3834 return;
3835 }
3836
3837 /*
3838 * Nuke the free list and reinsert all pages into it.
3839 */
3840 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3841 {
3842 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3843
3844#ifdef IN_RING3
3845 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMR3, pPage->pvPageR3));
3846#endif
3847#ifdef PGMPOOL_WITH_MONITORING
3848 if (pPage->fMonitored)
3849 pgmPoolMonitorFlush(pPool, pPage);
3850 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3851 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3852 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3853 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3854 pPage->cModifications = 0;
3855#endif
3856 pPage->GCPhys = NIL_RTGCPHYS;
3857 pPage->enmKind = PGMPOOLKIND_FREE;
3858 Assert(pPage->idx == i);
3859 pPage->iNext = i + 1;
3860 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3861 pPage->fSeenNonGlobal = false;
3862 pPage->fMonitored= false;
3863 pPage->fCached = false;
3864 pPage->fReusedFlushPending = false;
3865 pPage->fCR3Mix = false;
3866#ifdef PGMPOOL_WITH_USER_TRACKING
3867 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3868#endif
3869#ifdef PGMPOOL_WITH_CACHE
3870 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3871 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3872#endif
3873 }
3874 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3875 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3876 pPool->cUsedPages = 0;
3877
3878#ifdef PGMPOOL_WITH_USER_TRACKING
3879 /*
3880 * Zap and reinitialize the user records.
3881 */
3882 pPool->cPresent = 0;
3883 pPool->iUserFreeHead = 0;
3884 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3885 const unsigned cMaxUsers = pPool->cMaxUsers;
3886 for (unsigned i = 0; i < cMaxUsers; i++)
3887 {
3888 paUsers[i].iNext = i + 1;
3889 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3890 paUsers[i].iUserTable = 0xfffffffe;
3891 }
3892 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3893#endif
3894
3895#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3896 /*
3897 * Clear all the GCPhys links and rebuild the phys ext free list.
3898 */
3899 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3900 pRam;
3901 pRam = pRam->CTX_SUFF(pNext))
3902 {
3903 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3904 while (iPage-- > 0)
3905 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3906 }
3907
3908 pPool->iPhysExtFreeHead = 0;
3909 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3910 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3911 for (unsigned i = 0; i < cMaxPhysExts; i++)
3912 {
3913 paPhysExts[i].iNext = i + 1;
3914 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3915 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3916 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3917 }
3918 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3919#endif
3920
3921#ifdef PGMPOOL_WITH_MONITORING
3922 /*
3923 * Just zap the modified list.
3924 */
3925 pPool->cModifiedPages = 0;
3926 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3927#endif
3928
3929#ifdef PGMPOOL_WITH_CACHE
3930 /*
3931 * Clear the GCPhys hash and the age list.
3932 */
3933 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
3934 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3935 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3936 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3937#endif
3938
3939 /*
3940 * Flush all the special root pages.
3941 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3942 */
3943 pgmPoolFlushAllSpecialRoots(pPool);
3944 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3945 {
3946 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3947 pPage->iNext = NIL_PGMPOOL_IDX;
3948#ifdef PGMPOOL_WITH_MONITORING
3949 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3950 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3951 pPage->cModifications = 0;
3952 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3953 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3954 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3955 if (pPage->fMonitored)
3956 {
3957 PVM pVM = pPool->CTX_SUFF(pVM);
3958 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3959 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3960 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3961 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
3962 pPool->pszAccessHandler);
3963 AssertFatalRCSuccess(rc);
3964# ifdef PGMPOOL_WITH_CACHE
3965 pgmPoolHashInsert(pPool, pPage);
3966# endif
3967 }
3968#endif
3969#ifdef PGMPOOL_WITH_USER_TRACKING
3970 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3971#endif
3972#ifdef PGMPOOL_WITH_CACHE
3973 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3974 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3975#endif
3976 }
3977
3978 /*
3979 * Finally, assert the FF.
3980 */
3981 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
3982
3983 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3984}
3985
3986
3987/**
3988 * Flushes a pool page.
3989 *
3990 * This moves the page to the free list after removing all user references to it.
3991 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3992 *
3993 * @returns VBox status code.
3994 * @retval VINF_SUCCESS on success.
3995 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3996 * @param pPool The pool.
3997 * @param HCPhys The HC physical address of the shadow page.
3998 */
3999int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4000{
4001 int rc = VINF_SUCCESS;
4002 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4003 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%d, .GCPhys=%RGp}\n",
4004 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
4005
4006 /*
4007 * Quietly reject any attempts at flushing any of the special root pages.
4008 */
4009 if (pPage->idx < PGMPOOL_IDX_FIRST)
4010 {
4011 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4012 return VINF_SUCCESS;
4013 }
4014
4015 /*
4016 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4017 */
4018 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
4019 {
4020#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4021 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4,
4022 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4023#endif
4024 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4025 return VINF_SUCCESS;
4026 }
4027
4028#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4029 /* Start a subset so we won't run out of mapping space. */
4030 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4031 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4032#endif
4033
4034 /*
4035 * Mark the page as being in need of a ASMMemZeroPage().
4036 */
4037 pPage->fZeroed = false;
4038
4039#ifdef PGMPOOL_WITH_USER_TRACKING
4040 /*
4041 * Clear the page.
4042 */
4043 pgmPoolTrackClearPageUsers(pPool, pPage);
4044 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4045 pgmPoolTrackDeref(pPool, pPage);
4046 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4047#endif
4048
4049#ifdef PGMPOOL_WITH_CACHE
4050 /*
4051 * Flush it from the cache.
4052 */
4053 pgmPoolCacheFlushPage(pPool, pPage);
4054#endif /* PGMPOOL_WITH_CACHE */
4055
4056#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4057 /* Heavy stuff done. */
4058 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4059#endif
4060
4061#ifdef PGMPOOL_WITH_MONITORING
4062 /*
4063 * Deregistering the monitoring.
4064 */
4065 if (pPage->fMonitored)
4066 rc = pgmPoolMonitorFlush(pPool, pPage);
4067#endif
4068
4069 /*
4070 * Free the page.
4071 */
4072 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4073 pPage->iNext = pPool->iFreeHead;
4074 pPool->iFreeHead = pPage->idx;
4075 pPage->enmKind = PGMPOOLKIND_FREE;
4076 pPage->GCPhys = NIL_RTGCPHYS;
4077 pPage->fReusedFlushPending = false;
4078
4079 pPool->cUsedPages--;
4080 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4081 return rc;
4082}
4083
4084
4085/**
4086 * Frees a usage of a pool page.
4087 *
4088 * The caller is responsible to updating the user table so that it no longer
4089 * references the shadow page.
4090 *
4091 * @param pPool The pool.
4092 * @param HCPhys The HC physical address of the shadow page.
4093 * @param iUser The shadow page pool index of the user table.
4094 * @param iUserTable The index into the user table (shadowed).
4095 */
4096void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4097{
4098 STAM_PROFILE_START(&pPool->StatFree, a);
4099 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
4100 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
4101 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4102#ifdef PGMPOOL_WITH_USER_TRACKING
4103 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4104#endif
4105#ifdef PGMPOOL_WITH_CACHE
4106 if (!pPage->fCached)
4107#endif
4108 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
4109 STAM_PROFILE_STOP(&pPool->StatFree, a);
4110}
4111
4112
4113/**
4114 * Makes one or more free page free.
4115 *
4116 * @returns VBox status code.
4117 * @retval VINF_SUCCESS on success.
4118 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4119 *
4120 * @param pPool The pool.
4121 * @param iUser The user of the page.
4122 */
4123static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
4124{
4125 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4126
4127 /*
4128 * If the pool isn't full grown yet, expand it.
4129 */
4130 if (pPool->cCurPages < pPool->cMaxPages)
4131 {
4132 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4133#ifdef IN_RING3
4134 int rc = PGMR3PoolGrow(pPool->pVMR3);
4135#else
4136 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
4137#endif
4138 if (RT_FAILURE(rc))
4139 return rc;
4140 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4141 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4142 return VINF_SUCCESS;
4143 }
4144
4145#ifdef PGMPOOL_WITH_CACHE
4146 /*
4147 * Free one cached page.
4148 */
4149 return pgmPoolCacheFreeOne(pPool, iUser);
4150#else
4151 /*
4152 * Flush the pool.
4153 *
4154 * If we have tracking enabled, it should be possible to come up with
4155 * a cheap replacement strategy...
4156 */
4157 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
4158 Assert(!CPUMIsGuestInLongMode(pVM));
4159 pgmPoolFlushAllInt(pPool);
4160 return VERR_PGM_POOL_FLUSHED;
4161#endif
4162}
4163
4164
4165/**
4166 * Allocates a page from the pool.
4167 *
4168 * This page may actually be a cached page and not in need of any processing
4169 * on the callers part.
4170 *
4171 * @returns VBox status code.
4172 * @retval VINF_SUCCESS if a NEW page was allocated.
4173 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4174 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4175 * @param pVM The VM handle.
4176 * @param GCPhys The GC physical address of the page we're gonna shadow.
4177 * For 4MB and 2MB PD entries, it's the first address the
4178 * shadow PT is covering.
4179 * @param enmKind The kind of mapping.
4180 * @param iUser The shadow page pool index of the user table.
4181 * @param iUserTable The index into the user table (shadowed).
4182 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4183 */
4184int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
4185{
4186 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4187 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4188 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
4189 *ppPage = NULL;
4190 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4191 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4192 * Assert(!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4193
4194#ifdef PGMPOOL_WITH_CACHE
4195 if (pPool->fCacheEnabled)
4196 {
4197 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
4198 if (RT_SUCCESS(rc2))
4199 {
4200 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4201 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4202 return rc2;
4203 }
4204 }
4205#endif
4206
4207 /*
4208 * Allocate a new one.
4209 */
4210 int rc = VINF_SUCCESS;
4211 uint16_t iNew = pPool->iFreeHead;
4212 if (iNew == NIL_PGMPOOL_IDX)
4213 {
4214 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
4215 if (RT_FAILURE(rc))
4216 {
4217 if (rc != VERR_PGM_POOL_CLEARED)
4218 {
4219 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4220 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4221 return rc;
4222 }
4223 Log(("pgmPoolMakeMoreFreePages failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
4224 rc = VERR_PGM_POOL_FLUSHED;
4225 }
4226 iNew = pPool->iFreeHead;
4227 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4228 }
4229
4230 /* unlink the free head */
4231 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4232 pPool->iFreeHead = pPage->iNext;
4233 pPage->iNext = NIL_PGMPOOL_IDX;
4234
4235 /*
4236 * Initialize it.
4237 */
4238 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4239 pPage->enmKind = enmKind;
4240 pPage->GCPhys = GCPhys;
4241 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4242 pPage->fMonitored = false;
4243 pPage->fCached = false;
4244 pPage->fReusedFlushPending = false;
4245 pPage->fCR3Mix = false;
4246 pPage->iUser = iUser;
4247 pPage->iUserTable = iUserTable;
4248#ifdef PGMPOOL_WITH_MONITORING
4249 pPage->cModifications = 0;
4250 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4251 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4252#endif
4253#ifdef PGMPOOL_WITH_USER_TRACKING
4254 pPage->cPresent = 0;
4255 pPage->iFirstPresent = ~0;
4256
4257 /*
4258 * Insert into the tracking and cache. If this fails, free the page.
4259 */
4260 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4261 if (RT_FAILURE(rc3))
4262 {
4263 if (rc3 != VERR_PGM_POOL_CLEARED)
4264 {
4265 pPool->cUsedPages--;
4266 pPage->enmKind = PGMPOOLKIND_FREE;
4267 pPage->GCPhys = NIL_RTGCPHYS;
4268 pPage->iNext = pPool->iFreeHead;
4269 pPool->iFreeHead = pPage->idx;
4270 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4271 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4272 return rc3;
4273 }
4274 Log(("pgmPoolTrackInsert failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4275 rc = VERR_PGM_POOL_FLUSHED;
4276 }
4277#endif /* PGMPOOL_WITH_USER_TRACKING */
4278
4279 /*
4280 * Commit the allocation, clear the page and return.
4281 */
4282#ifdef VBOX_WITH_STATISTICS
4283 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4284 pPool->cUsedPagesHigh = pPool->cUsedPages;
4285#endif
4286
4287 if (!pPage->fZeroed)
4288 {
4289 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4290 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4291 ASMMemZeroPage(pv);
4292 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4293 }
4294
4295 *ppPage = pPage;
4296 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4297 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4298 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4299 return rc;
4300}
4301
4302
4303/**
4304 * Frees a usage of a pool page.
4305 *
4306 * @param pVM The VM handle.
4307 * @param HCPhys The HC physical address of the shadow page.
4308 * @param iUser The shadow page pool index of the user table.
4309 * @param iUserTable The index into the user table (shadowed).
4310 */
4311void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4312{
4313 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4314 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4315 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4316}
4317
4318
4319/**
4320 * Gets a in-use page in the pool by it's physical address.
4321 *
4322 * @returns Pointer to the page.
4323 * @param pVM The VM handle.
4324 * @param HCPhys The HC physical address of the shadow page.
4325 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4326 */
4327PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4328{
4329 /** @todo profile this! */
4330 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4331 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4332 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%RHp -> %p:{.idx=%d .GCPhys=%RGp .enmKind=%d}\n",
4333 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
4334 return pPage;
4335}
4336
4337
4338/**
4339 * Flushes the entire cache.
4340 *
4341 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4342 * and execute this CR3 flush.
4343 *
4344 * @param pPool The pool.
4345 */
4346void pgmPoolFlushAll(PVM pVM)
4347{
4348 LogFlow(("pgmPoolFlushAll:\n"));
4349 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4350}
4351
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette