VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 15406

Last change on this file since 15406 was 15406, checked in by vboxsync, 16 years ago

PGMPool: temporarily enabled the old code.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 149.8 KB
Line 
1/* $Id: PGMAllPool.cpp 15406 2008-12-12 22:56:44Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42
43
44/*******************************************************************************
45* Internal Functions *
46*******************************************************************************/
47__BEGIN_DECLS
48static void pgmPoolFlushAllInt(PPGMPOOL pPool);
49#ifdef PGMPOOL_WITH_USER_TRACKING
50DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
51DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#endif
54#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
55static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
56#endif
57#ifdef PGMPOOL_WITH_CACHE
58static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
59#endif
60#ifdef PGMPOOL_WITH_MONITORING
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#endif
63#ifndef IN_RING3
64DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
65#endif
66__END_DECLS
67
68
69/**
70 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
71 *
72 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
73 * @param enmKind The page kind.
74 */
75DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
76{
77 switch (enmKind)
78 {
79 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
82 return true;
83 default:
84 return false;
85 }
86}
87
88
89#if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
90/**
91 * Maps a pool page into the current context.
92 *
93 * @returns Pointer to the mapping.
94 * @param pPGM Pointer to the PGM instance data.
95 * @param pPage The page to map.
96 */
97void *pgmPoolMapPageFallback(PPGM pPGM, PPGMPOOLPAGE pPage)
98{
99 /* general pages are take care of by the inlined part, it
100 only ends up here in case of failure. */
101 AssertReleaseReturn(pPage->idx < PGMPOOL_IDX_FIRST, NULL);
102
103/** @todo make sure HCPhys is valid for *all* indexes. */
104 /* special pages. */
105# ifdef IN_RC
106 switch (pPage->idx)
107 {
108 case PGMPOOL_IDX_PD:
109 return pPGM->pShw32BitPdRC;
110 case PGMPOOL_IDX_PAE_PD:
111 case PGMPOOL_IDX_PAE_PD_0:
112 return pPGM->apShwPaePDsRC[0];
113 case PGMPOOL_IDX_PAE_PD_1:
114 return pPGM->apShwPaePDsRC[1];
115 case PGMPOOL_IDX_PAE_PD_2:
116 return pPGM->apShwPaePDsRC[2];
117 case PGMPOOL_IDX_PAE_PD_3:
118 return pPGM->apShwPaePDsRC[3];
119 case PGMPOOL_IDX_PDPT:
120 return pPGM->pShwPaePdptRC;
121 default:
122 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
123 return NULL;
124 }
125
126# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
127 RTHCPHYS HCPhys;
128 switch (pPage->idx)
129 {
130 case PGMPOOL_IDX_PD:
131 HCPhys = pPGM->HCPhysShw32BitPD;
132 break;
133 case PGMPOOL_IDX_PAE_PD_0:
134 HCPhys = pPGM->aHCPhysPaePDs[0];
135 break;
136 case PGMPOOL_IDX_PAE_PD_1:
137 HCPhys = pPGM->aHCPhysPaePDs[1];
138 break;
139 case PGMPOOL_IDX_PAE_PD_2:
140 HCPhys = pPGM->aHCPhysPaePDs[2];
141 break;
142 case PGMPOOL_IDX_PAE_PD_3:
143 HCPhys = pPGM->aHCPhysPaePDs[3];
144 break;
145 case PGMPOOL_IDX_PDPT:
146 HCPhys = pPGM->HCPhysShwPaePdpt;
147 break;
148 case PGMPOOL_IDX_PAE_PD:
149 AssertReleaseMsgFailed(("PGMPOOL_IDX_PAE_PD is not usable in VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 context\n"));
150 return NULL;
151 default:
152 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
153 return NULL;
154 }
155 void *pv;
156 int rc = pgmR0DynMapHCPageInlined(pPGM, HCPhys, &pv);
157 AssertReleaseRCReturn(rc, NULL);
158 return pv;
159# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
160}
161#endif /* IN_RC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
162
163
164#ifdef PGMPOOL_WITH_MONITORING
165/**
166 * Determin the size of a write instruction.
167 * @returns number of bytes written.
168 * @param pDis The disassembler state.
169 */
170static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
171{
172 /*
173 * This is very crude and possibly wrong for some opcodes,
174 * but since it's not really supposed to be called we can
175 * probably live with that.
176 */
177 return DISGetParamSize(pDis, &pDis->param1);
178}
179
180
181/**
182 * Flushes a chain of pages sharing the same access monitor.
183 *
184 * @returns VBox status code suitable for scheduling.
185 * @param pPool The pool.
186 * @param pPage A page in the chain.
187 */
188int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
189{
190 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
191
192 /*
193 * Find the list head.
194 */
195 uint16_t idx = pPage->idx;
196 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
197 {
198 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
199 {
200 idx = pPage->iMonitoredPrev;
201 Assert(idx != pPage->idx);
202 pPage = &pPool->aPages[idx];
203 }
204 }
205
206 /*
207 * Iterate the list flushing each shadow page.
208 */
209 int rc = VINF_SUCCESS;
210 for (;;)
211 {
212 idx = pPage->iMonitoredNext;
213 Assert(idx != pPage->idx);
214 if (pPage->idx >= PGMPOOL_IDX_FIRST)
215 {
216 int rc2 = pgmPoolFlushPage(pPool, pPage);
217 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
218 rc = VINF_PGM_SYNC_CR3;
219 }
220 /* next */
221 if (idx == NIL_PGMPOOL_IDX)
222 break;
223 pPage = &pPool->aPages[idx];
224 }
225 return rc;
226}
227
228
229/**
230 * Wrapper for getting the current context pointer to the entry being modified.
231 *
232 * @returns Pointer to the current context mapping of the entry.
233 * @param pPool The pool.
234 * @param pvFault The fault virtual address.
235 * @param GCPhysFault The fault physical address.
236 * @param cbEntry The entry size.
237 */
238#ifdef IN_RING3
239DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
240#else
241DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
242#endif
243{
244#ifdef IN_RC
245 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
246
247#elif defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
248 void *pvRet;
249 int rc = PGMDynMapGCPageOff(pPool->pVMR0, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
250 AssertFatalRCSuccess(rc);
251 return pvRet;
252
253#elif defined(IN_RING0)
254 void *pvRet;
255 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMR0->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
256 AssertFatalRCSuccess(rc);
257 return pvRet;
258
259#elif defined(IN_RING3)
260 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
261#else
262# error "huh?"
263#endif
264}
265
266
267/**
268 * Process shadow entries before they are changed by the guest.
269 *
270 * For PT entries we will clear them. For PD entries, we'll simply check
271 * for mapping conflicts and set the SyncCR3 FF if found.
272 *
273 * @param pPool The pool.
274 * @param pPage The head page.
275 * @param GCPhysFault The guest physical fault address.
276 * @param uAddress In R0 and GC this is the guest context fault address (flat).
277 * In R3 this is the host context 'fault' address.
278 * @param pCpu The disassembler state for figuring out the write size.
279 * This need not be specified if the caller knows we won't do cross entry accesses.
280 */
281#ifdef IN_RING3
282void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
283#else
284void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
285#endif
286{
287 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
288 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
289 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
290
291 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
292
293 for (;;)
294 {
295 union
296 {
297 void *pv;
298 PX86PT pPT;
299 PX86PTPAE pPTPae;
300 PX86PD pPD;
301 PX86PDPAE pPDPae;
302 PX86PDPT pPDPT;
303 PX86PML4 pPML4;
304 } uShw;
305
306 switch (pPage->enmKind)
307 {
308 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
309 {
310 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
311 const unsigned iShw = off / sizeof(X86PTE);
312 if (uShw.pPT->a[iShw].n.u1Present)
313 {
314# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
315 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
316 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
317 pgmPoolTracDerefGCPhysHint(pPool, pPage,
318 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
319 pGstPte->u & X86_PTE_PG_MASK);
320# endif
321 uShw.pPT->a[iShw].u = 0;
322 }
323 break;
324 }
325
326 /* page/2 sized */
327 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
328 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
329 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
330 {
331 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
332 if (uShw.pPTPae->a[iShw].n.u1Present)
333 {
334# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
335 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
336 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
337 pgmPoolTracDerefGCPhysHint(pPool, pPage,
338 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
339 pGstPte->u & X86_PTE_PG_MASK);
340# endif
341 uShw.pPTPae->a[iShw].u = 0;
342 }
343 }
344 break;
345
346 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
347 {
348 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
349 const unsigned iShw = off / sizeof(X86PTEPAE);
350 if (uShw.pPTPae->a[iShw].n.u1Present)
351 {
352# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
353 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
354 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
355 pgmPoolTracDerefGCPhysHint(pPool, pPage,
356 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
357 pGstPte->u & X86_PTE_PAE_PG_MASK);
358# endif
359 uShw.pPTPae->a[iShw].u = 0;
360 }
361
362 /* paranoia / a bit assumptive. */
363 if ( pCpu
364 && (off & 7)
365 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
366 {
367 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
368 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
369
370 if (uShw.pPTPae->a[iShw2].n.u1Present)
371 {
372# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
373 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
374 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
375 pgmPoolTracDerefGCPhysHint(pPool, pPage,
376 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
377 pGstPte->u & X86_PTE_PAE_PG_MASK);
378# endif
379 uShw.pPTPae->a[iShw2].u = 0;
380 }
381 }
382
383 break;
384 }
385
386 case PGMPOOLKIND_ROOT_32BIT_PD:
387 {
388 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
389 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
390 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
391 {
392 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
393 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
394 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
395 }
396 /* paranoia / a bit assumptive. */
397 else if ( pCpu
398 && (off & 3)
399 && (off & 3) + cbWrite > sizeof(X86PTE))
400 {
401 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
402 if ( iShw2 != iShw
403 && iShw2 < RT_ELEMENTS(uShw.pPD->a)
404 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
405 {
406 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
407 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
408 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
409 }
410 }
411#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
412 if ( uShw.pPD->a[iShw].n.u1Present
413 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
414 {
415 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
416# ifdef IN_RC /* TLB load - we're pushing things a bit... */
417 ASMProbeReadByte(pvAddress);
418# endif
419 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
420 uShw.pPD->a[iShw].u = 0;
421 }
422#endif
423 break;
424 }
425
426 case PGMPOOLKIND_ROOT_PAE_PD:
427 {
428 unsigned iGst = off / sizeof(X86PDE); // ASSUMING 32-bit guest paging!
429 unsigned iShwPdpt = iGst / 256;
430 unsigned iShw = (iGst % 256) * 2;
431 Assert(pPage->idx == PGMPOOL_IDX_PAE_PD);
432 PPGMPOOLPAGE pPage2 = pPage + 1 + iShwPdpt;
433 Assert(pPage2->idx == PGMPOOL_IDX_PAE_PD_0 + iShwPdpt);
434 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage2);
435 for (unsigned i = 0; i < 2; i++, iShw++)
436 {
437 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
438 {
439 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
440 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
441 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw));
442 }
443 /* paranoia / a bit assumptive. */
444 else if ( pCpu
445 && (off & 3)
446 && (off & 3) + cbWrite > 4)
447 {
448 const unsigned iShw2 = iShw + 2;
449 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a) /** @todo was completely wrong, it's better now after #1865 but still wrong from cross PD. */
450 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
451 {
452 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
453 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
454 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
455 }
456 }
457#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
458 if ( uShw.pPDPae->a[iShw].n.u1Present
459 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
460 {
461 LogFlow(("pgmPoolMonitorChainChanging: iShwPdpt=%#x iShw=%#x: %RX64 -> freeing it!\n", iShwPdpt, iShw, uShw.pPDPae->a[iShw].u));
462# ifdef IN_RC /* TLB load - we're pushing things a bit... */
463 ASMProbeReadByte(pvAddress);
464# endif
465 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + iShwPdpt * X86_PG_PAE_ENTRIES);
466 uShw.pPDPae->a[iShw].u = 0;
467 }
468#endif
469 }
470 break;
471 }
472
473 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
474 {
475 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
476 const unsigned iShw = off / sizeof(X86PDEPAE);
477 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
478 {
479 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
480 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
481 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
482 }
483#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
484 /*
485 * Causes trouble when the guest uses a PDE to refer to the whole page table level
486 * structure. (Invalidate here; faults later on when it tries to change the page
487 * table entries -> recheck; probably only applies to the RC case.)
488 */
489 else
490 {
491 if (uShw.pPDPae->a[iShw].n.u1Present)
492 {
493 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
494 pgmPoolFree(pPool->CTX_SUFF(pVM),
495 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
496 /* Note: hardcoded PAE implementation dependency */
497 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
498 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
499 uShw.pPDPae->a[iShw].u = 0;
500 }
501 }
502#endif
503 /* paranoia / a bit assumptive. */
504 if ( pCpu
505 && (off & 7)
506 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
507 {
508 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
509 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
510
511 if ( iShw2 != iShw
512 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
513 {
514 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
515 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
516 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
517 }
518#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
519 else if (uShw.pPDPae->a[iShw2].n.u1Present)
520 {
521 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
522 pgmPoolFree(pPool->CTX_SUFF(pVM),
523 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
524 /* Note: hardcoded PAE implementation dependency */
525 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
526 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
527 uShw.pPDPae->a[iShw2].u = 0;
528 }
529#endif
530 }
531 break;
532 }
533
534 case PGMPOOLKIND_ROOT_PDPT:
535 {
536 /*
537 * Hopefully this doesn't happen very often:
538 * - touching unused parts of the page
539 * - messing with the bits of pd pointers without changing the physical address
540 */
541 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
542 const unsigned iShw = off / sizeof(X86PDPE);
543 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
544 {
545 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
546 {
547 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
548 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
549 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
550 }
551 /* paranoia / a bit assumptive. */
552 else if ( pCpu
553 && (off & 7)
554 && (off & 7) + cbWrite > sizeof(X86PDPE))
555 {
556 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
557 if ( iShw2 != iShw
558 && iShw2 < X86_PG_PAE_PDPE_ENTRIES
559 && uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
560 {
561 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
562 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
563 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
564 }
565 }
566 }
567 break;
568 }
569
570#ifndef IN_RC
571 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
572 {
573 Assert(pPage->enmKind == PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD);
574
575 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
576 const unsigned iShw = off / sizeof(X86PDEPAE);
577 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
578 {
579 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
580 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
581 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
582 }
583 else
584 {
585 if (uShw.pPDPae->a[iShw].n.u1Present)
586 {
587 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
588 pgmPoolFree(pPool->CTX_SUFF(pVM),
589 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
590 pPage->idx,
591 iShw);
592 uShw.pPDPae->a[iShw].u = 0;
593 }
594 }
595 /* paranoia / a bit assumptive. */
596 if ( pCpu
597 && (off & 7)
598 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
599 {
600 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
601 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
602
603 if ( iShw2 != iShw
604 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
605 {
606 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
607 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
608 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
609 }
610 else
611 if (uShw.pPDPae->a[iShw2].n.u1Present)
612 {
613 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
614 pgmPoolFree(pPool->CTX_SUFF(pVM),
615 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
616 pPage->idx,
617 iShw2);
618 uShw.pPDPae->a[iShw2].u = 0;
619 }
620 }
621 break;
622 }
623
624 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
625 {
626 /*
627 * Hopefully this doesn't happen very often:
628 * - messing with the bits of pd pointers without changing the physical address
629 */
630 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
631 {
632 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
633 const unsigned iShw = off / sizeof(X86PDPE);
634 if (uShw.pPDPT->a[iShw].n.u1Present)
635 {
636 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
637 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
638 uShw.pPDPT->a[iShw].u = 0;
639 }
640 /* paranoia / a bit assumptive. */
641 if ( pCpu
642 && (off & 7)
643 && (off & 7) + cbWrite > sizeof(X86PDPE))
644 {
645 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
646 if (uShw.pPDPT->a[iShw2].n.u1Present)
647 {
648 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
649 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
650 uShw.pPDPT->a[iShw2].u = 0;
651 }
652 }
653 }
654 break;
655 }
656
657 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
658 {
659 /*
660 * Hopefully this doesn't happen very often:
661 * - messing with the bits of pd pointers without changing the physical address
662 */
663 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
664 {
665 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
666 const unsigned iShw = off / sizeof(X86PDPE);
667 if (uShw.pPML4->a[iShw].n.u1Present)
668 {
669 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
670 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
671 uShw.pPML4->a[iShw].u = 0;
672 }
673 /* paranoia / a bit assumptive. */
674 if ( pCpu
675 && (off & 7)
676 && (off & 7) + cbWrite > sizeof(X86PDPE))
677 {
678 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
679 if (uShw.pPML4->a[iShw2].n.u1Present)
680 {
681 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
682 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
683 uShw.pPML4->a[iShw2].u = 0;
684 }
685 }
686 }
687 break;
688 }
689#endif /* IN_RING0 */
690
691 default:
692 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
693 }
694
695 /* next */
696 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
697 return;
698 pPage = &pPool->aPages[pPage->iMonitoredNext];
699 }
700}
701
702
703# ifndef IN_RING3
704/**
705 * Checks if a access could be a fork operation in progress.
706 *
707 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
708 *
709 * @returns true if it's likly that we're forking, otherwise false.
710 * @param pPool The pool.
711 * @param pCpu The disassembled instruction.
712 * @param offFault The access offset.
713 */
714DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
715{
716 /*
717 * i386 linux is using btr to clear X86_PTE_RW.
718 * The functions involved are (2.6.16 source inspection):
719 * clear_bit
720 * ptep_set_wrprotect
721 * copy_one_pte
722 * copy_pte_range
723 * copy_pmd_range
724 * copy_pud_range
725 * copy_page_range
726 * dup_mmap
727 * dup_mm
728 * copy_mm
729 * copy_process
730 * do_fork
731 */
732 if ( pCpu->pCurInstr->opcode == OP_BTR
733 && !(offFault & 4)
734 /** @todo Validate that the bit index is X86_PTE_RW. */
735 )
736 {
737 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
738 return true;
739 }
740 return false;
741}
742
743
744/**
745 * Determine whether the page is likely to have been reused.
746 *
747 * @returns true if we consider the page as being reused for a different purpose.
748 * @returns false if we consider it to still be a paging page.
749 * @param pVM VM Handle.
750 * @param pPage The page in question.
751 * @param pRegFrame Trap register frame.
752 * @param pCpu The disassembly info for the faulting instruction.
753 * @param pvFault The fault address.
754 *
755 * @remark The REP prefix check is left to the caller because of STOSD/W.
756 */
757DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
758{
759#ifndef IN_RC
760 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
761 if ( HWACCMHasPendingIrq(pVM)
762 && (pRegFrame->rsp - pvFault) < 32)
763 {
764 /* Fault caused by stack writes while trying to inject an interrupt event. */
765 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
766 return true;
767 }
768#else
769 NOREF(pVM); NOREF(pvFault);
770#endif
771
772 switch (pCpu->pCurInstr->opcode)
773 {
774 /* call implies the actual push of the return address faulted */
775 case OP_CALL:
776 Log4(("pgmPoolMonitorIsReused: CALL\n"));
777 return true;
778 case OP_PUSH:
779 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
780 return true;
781 case OP_PUSHF:
782 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
783 return true;
784 case OP_PUSHA:
785 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
786 return true;
787 case OP_FXSAVE:
788 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
789 return true;
790 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
791 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
792 return true;
793 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
794 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
795 return true;
796 case OP_MOVSWD:
797 case OP_STOSWD:
798 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
799 && pRegFrame->rcx >= 0x40
800 )
801 {
802 Assert(pCpu->mode == CPUMODE_64BIT);
803
804 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
805 return true;
806 }
807 return false;
808 }
809 if ( (pCpu->param1.flags & USE_REG_GEN32)
810 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
811 {
812 Log4(("pgmPoolMonitorIsReused: ESP\n"));
813 return true;
814 }
815
816 //if (pPage->fCR3Mix)
817 // return false;
818 return false;
819}
820
821
822/**
823 * Flushes the page being accessed.
824 *
825 * @returns VBox status code suitable for scheduling.
826 * @param pVM The VM handle.
827 * @param pPool The pool.
828 * @param pPage The pool page (head).
829 * @param pCpu The disassembly of the write instruction.
830 * @param pRegFrame The trap register frame.
831 * @param GCPhysFault The fault address as guest physical address.
832 * @param pvFault The fault address.
833 */
834static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
835 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
836{
837 /*
838 * First, do the flushing.
839 */
840 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
841
842 /*
843 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
844 */
845 uint32_t cbWritten;
846 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
847 if (RT_SUCCESS(rc2))
848 pRegFrame->rip += pCpu->opsize;
849 else if (rc2 == VERR_EM_INTERPRETER)
850 {
851#ifdef IN_RC
852 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
853 {
854 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
855 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
856 rc = VINF_SUCCESS;
857 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
858 }
859 else
860#endif
861 {
862 rc = VINF_EM_RAW_EMULATE_INSTR;
863 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
864 }
865 }
866 else
867 rc = rc2;
868
869 /* See use in pgmPoolAccessHandlerSimple(). */
870 PGM_INVL_GUEST_TLBS();
871
872 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
873 return rc;
874
875}
876
877
878/**
879 * Handles the STOSD write accesses.
880 *
881 * @returns VBox status code suitable for scheduling.
882 * @param pVM The VM handle.
883 * @param pPool The pool.
884 * @param pPage The pool page (head).
885 * @param pCpu The disassembly of the write instruction.
886 * @param pRegFrame The trap register frame.
887 * @param GCPhysFault The fault address as guest physical address.
888 * @param pvFault The fault address.
889 */
890DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
891 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
892{
893 Assert(pCpu->mode == CPUMODE_32BIT);
894
895 /*
896 * Increment the modification counter and insert it into the list
897 * of modified pages the first time.
898 */
899 if (!pPage->cModifications++)
900 pgmPoolMonitorModifiedInsert(pPool, pPage);
901
902 /*
903 * Execute REP STOSD.
904 *
905 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
906 * write situation, meaning that it's safe to write here.
907 */
908 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
909 while (pRegFrame->ecx)
910 {
911 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
912#ifdef IN_RC
913 *(uint32_t *)pu32 = pRegFrame->eax;
914#else
915 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
916#endif
917 pu32 += 4;
918 GCPhysFault += 4;
919 pRegFrame->edi += 4;
920 pRegFrame->ecx--;
921 }
922 pRegFrame->rip += pCpu->opsize;
923
924 /* See use in pgmPoolAccessHandlerSimple(). */
925 PGM_INVL_GUEST_TLBS();
926
927 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
928 return VINF_SUCCESS;
929}
930
931
932/**
933 * Handles the simple write accesses.
934 *
935 * @returns VBox status code suitable for scheduling.
936 * @param pVM The VM handle.
937 * @param pPool The pool.
938 * @param pPage The pool page (head).
939 * @param pCpu The disassembly of the write instruction.
940 * @param pRegFrame The trap register frame.
941 * @param GCPhysFault The fault address as guest physical address.
942 * @param pvFault The fault address.
943 */
944DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
945 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
946{
947 /*
948 * Increment the modification counter and insert it into the list
949 * of modified pages the first time.
950 */
951 if (!pPage->cModifications++)
952 pgmPoolMonitorModifiedInsert(pPool, pPage);
953
954 /*
955 * Clear all the pages. ASSUMES that pvFault is readable.
956 */
957 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
958
959 /*
960 * Interpret the instruction.
961 */
962 uint32_t cb;
963 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
964 if (RT_SUCCESS(rc))
965 pRegFrame->rip += pCpu->opsize;
966 else if (rc == VERR_EM_INTERPRETER)
967 {
968 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
969 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
970 rc = VINF_EM_RAW_EMULATE_INSTR;
971 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
972 }
973
974 /*
975 * Quick hack, with logging enabled we're getting stale
976 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
977 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
978 * have to be fixed to support this. But that'll have to wait till next week.
979 *
980 * An alternative is to keep track of the changed PTEs together with the
981 * GCPhys from the guest PT. This may proove expensive though.
982 *
983 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
984 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
985 */
986 PGM_INVL_GUEST_TLBS();
987
988 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
989 return rc;
990}
991
992
993/**
994 * \#PF Handler callback for PT write accesses.
995 *
996 * @returns VBox status code (appropriate for GC return).
997 * @param pVM VM Handle.
998 * @param uErrorCode CPU Error code.
999 * @param pRegFrame Trap register frame.
1000 * NULL on DMA and other non CPU access.
1001 * @param pvFault The fault address (cr2).
1002 * @param GCPhysFault The GC physical address corresponding to pvFault.
1003 * @param pvUser User argument.
1004 */
1005DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1006{
1007 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1008 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1009 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1010 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1011
1012 /*
1013 * We should ALWAYS have the list head as user parameter. This
1014 * is because we use that page to record the changes.
1015 */
1016 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1017
1018 /*
1019 * Disassemble the faulting instruction.
1020 */
1021 DISCPUSTATE Cpu;
1022 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1023 AssertRCReturn(rc, rc);
1024
1025 /*
1026 * Check if it's worth dealing with.
1027 */
1028 bool fReused = false;
1029 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1030 || pPage->fCR3Mix)
1031 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1032 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1033 {
1034 /*
1035 * Simple instructions, no REP prefix.
1036 */
1037 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1038 {
1039 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1040 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1041 return rc;
1042 }
1043
1044 /*
1045 * Windows is frequently doing small memset() operations (netio test 4k+).
1046 * We have to deal with these or we'll kill the cache and performance.
1047 */
1048 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1049 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1050 && pRegFrame->ecx <= 0x20
1051 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1052 && !((uintptr_t)pvFault & 3)
1053 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1054 && Cpu.mode == CPUMODE_32BIT
1055 && Cpu.opmode == CPUMODE_32BIT
1056 && Cpu.addrmode == CPUMODE_32BIT
1057 && Cpu.prefix == PREFIX_REP
1058 && !pRegFrame->eflags.Bits.u1DF
1059 )
1060 {
1061 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1062 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1063 return rc;
1064 }
1065
1066 /* REP prefix, don't bother. */
1067 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1068 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1069 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1070 }
1071
1072 /*
1073 * Not worth it, so flush it.
1074 *
1075 * If we considered it to be reused, don't to back to ring-3
1076 * to emulate failed instructions since we usually cannot
1077 * interpret then. This may be a bit risky, in which case
1078 * the reuse detection must be fixed.
1079 */
1080 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1081 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1082 rc = VINF_SUCCESS;
1083 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1084 return rc;
1085}
1086
1087# endif /* !IN_RING3 */
1088#endif /* PGMPOOL_WITH_MONITORING */
1089
1090#ifdef PGMPOOL_WITH_CACHE
1091
1092/**
1093 * Inserts a page into the GCPhys hash table.
1094 *
1095 * @param pPool The pool.
1096 * @param pPage The page.
1097 */
1098DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1099{
1100 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1101 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1102 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1103 pPage->iNext = pPool->aiHash[iHash];
1104 pPool->aiHash[iHash] = pPage->idx;
1105}
1106
1107
1108/**
1109 * Removes a page from the GCPhys hash table.
1110 *
1111 * @param pPool The pool.
1112 * @param pPage The page.
1113 */
1114DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1115{
1116 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1117 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1118 if (pPool->aiHash[iHash] == pPage->idx)
1119 pPool->aiHash[iHash] = pPage->iNext;
1120 else
1121 {
1122 uint16_t iPrev = pPool->aiHash[iHash];
1123 for (;;)
1124 {
1125 const int16_t i = pPool->aPages[iPrev].iNext;
1126 if (i == pPage->idx)
1127 {
1128 pPool->aPages[iPrev].iNext = pPage->iNext;
1129 break;
1130 }
1131 if (i == NIL_PGMPOOL_IDX)
1132 {
1133 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1134 break;
1135 }
1136 iPrev = i;
1137 }
1138 }
1139 pPage->iNext = NIL_PGMPOOL_IDX;
1140}
1141
1142
1143/**
1144 * Frees up one cache page.
1145 *
1146 * @returns VBox status code.
1147 * @retval VINF_SUCCESS on success.
1148 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1149 * @param pPool The pool.
1150 * @param iUser The user index.
1151 */
1152static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1153{
1154#ifndef IN_RC
1155 const PVM pVM = pPool->CTX_SUFF(pVM);
1156#endif
1157 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1158 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1159
1160 /*
1161 * Select one page from the tail of the age list.
1162 */
1163 uint16_t iToFree = pPool->iAgeTail;
1164 if (iToFree == iUser)
1165 iToFree = pPool->aPages[iToFree].iAgePrev;
1166/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1167 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1168 {
1169 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1170 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1171 {
1172 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1173 continue;
1174 iToFree = i;
1175 break;
1176 }
1177 }
1178*/
1179
1180 Assert(iToFree != iUser);
1181 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1182
1183 PPGMPOOLPAGE pPage = &pPool->aPages[iToFree];
1184
1185 /*
1186 * Reject any attempts at flushing the currently active shadow CR3 mapping
1187 */
1188 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
1189 {
1190 /* Refresh the cr3 mapping by putting it at the head of the age list. */
1191 pgmPoolCacheUsed(pPool, pPage);
1192 return pgmPoolCacheFreeOne(pPool, iUser);
1193 }
1194
1195 int rc = pgmPoolFlushPage(pPool, pPage);
1196 if (rc == VINF_SUCCESS)
1197 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1198 return rc;
1199}
1200
1201
1202/**
1203 * Checks if a kind mismatch is really a page being reused
1204 * or if it's just normal remappings.
1205 *
1206 * @returns true if reused and the cached page (enmKind1) should be flushed
1207 * @returns false if not reused.
1208 * @param enmKind1 The kind of the cached page.
1209 * @param enmKind2 The kind of the requested page.
1210 */
1211static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1212{
1213 switch (enmKind1)
1214 {
1215 /*
1216 * Never reuse them. There is no remapping in non-paging mode.
1217 */
1218 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1219 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1220 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1221 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1222 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1223 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1224 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1225 return true;
1226
1227 /*
1228 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1229 */
1230 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1231 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1232 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1233 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1234 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1235 switch (enmKind2)
1236 {
1237 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1238 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1239 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1240 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1241 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1242 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1243 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1244 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1245 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1246 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1247 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1248 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1249 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1250 return true;
1251 default:
1252 return false;
1253 }
1254
1255 /*
1256 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1257 */
1258 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1259 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1260 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1261 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1262 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1263 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1264 switch (enmKind2)
1265 {
1266 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1267 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1268 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1269 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1270 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1271 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1272 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1273 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1274 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1275 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1276 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1277 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1278 return true;
1279 default:
1280 return false;
1281 }
1282
1283 /*
1284 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1285 */
1286 case PGMPOOLKIND_ROOT_32BIT_PD:
1287 case PGMPOOLKIND_ROOT_PAE_PD:
1288 case PGMPOOLKIND_ROOT_PDPT:
1289 case PGMPOOLKIND_ROOT_NESTED:
1290 return false;
1291
1292 default:
1293 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1294 }
1295}
1296
1297
1298/**
1299 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1300 *
1301 * @returns VBox status code.
1302 * @retval VINF_PGM_CACHED_PAGE on success.
1303 * @retval VERR_FILE_NOT_FOUND if not found.
1304 * @param pPool The pool.
1305 * @param GCPhys The GC physical address of the page we're gonna shadow.
1306 * @param enmKind The kind of mapping.
1307 * @param iUser The shadow page pool index of the user table.
1308 * @param iUserTable The index into the user table (shadowed).
1309 * @param ppPage Where to store the pointer to the page.
1310 */
1311static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1312{
1313#ifndef IN_RC
1314 const PVM pVM = pPool->CTX_SUFF(pVM);
1315#endif
1316 /*
1317 * Look up the GCPhys in the hash.
1318 */
1319 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1320 Log3(("pgmPoolCacheAlloc: %RGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1321 if (i != NIL_PGMPOOL_IDX)
1322 {
1323 do
1324 {
1325 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1326 Log3(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1327 if (pPage->GCPhys == GCPhys)
1328 {
1329 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1330 {
1331 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1332 if (RT_SUCCESS(rc))
1333 {
1334 *ppPage = pPage;
1335 STAM_COUNTER_INC(&pPool->StatCacheHits);
1336 return VINF_PGM_CACHED_PAGE;
1337 }
1338 return rc;
1339 }
1340
1341 /*
1342 * The kind is different. In some cases we should now flush the page
1343 * as it has been reused, but in most cases this is normal remapping
1344 * of PDs as PT or big pages using the GCPhys field in a slightly
1345 * different way than the other kinds.
1346 */
1347 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1348 {
1349 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1350 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1351 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1352 break;
1353 }
1354 }
1355
1356 /* next */
1357 i = pPage->iNext;
1358 } while (i != NIL_PGMPOOL_IDX);
1359 }
1360
1361 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1362 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1363 return VERR_FILE_NOT_FOUND;
1364}
1365
1366
1367/**
1368 * Inserts a page into the cache.
1369 *
1370 * @param pPool The pool.
1371 * @param pPage The cached page.
1372 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1373 */
1374static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1375{
1376 /*
1377 * Insert into the GCPhys hash if the page is fit for that.
1378 */
1379 Assert(!pPage->fCached);
1380 if (fCanBeCached)
1381 {
1382 pPage->fCached = true;
1383 pgmPoolHashInsert(pPool, pPage);
1384 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1385 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1386 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1387 }
1388 else
1389 {
1390 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1391 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1392 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1393 }
1394
1395 /*
1396 * Insert at the head of the age list.
1397 */
1398 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1399 pPage->iAgeNext = pPool->iAgeHead;
1400 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1401 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1402 else
1403 pPool->iAgeTail = pPage->idx;
1404 pPool->iAgeHead = pPage->idx;
1405}
1406
1407
1408/**
1409 * Flushes a cached page.
1410 *
1411 * @param pPool The pool.
1412 * @param pPage The cached page.
1413 */
1414static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1415{
1416 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1417
1418 /*
1419 * Remove the page from the hash.
1420 */
1421 if (pPage->fCached)
1422 {
1423 pPage->fCached = false;
1424 pgmPoolHashRemove(pPool, pPage);
1425 }
1426 else
1427 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1428
1429 /*
1430 * Remove it from the age list.
1431 */
1432 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1433 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1434 else
1435 pPool->iAgeTail = pPage->iAgePrev;
1436 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1437 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1438 else
1439 pPool->iAgeHead = pPage->iAgeNext;
1440 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1441 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1442}
1443
1444#endif /* PGMPOOL_WITH_CACHE */
1445#ifdef PGMPOOL_WITH_MONITORING
1446
1447/**
1448 * Looks for pages sharing the monitor.
1449 *
1450 * @returns Pointer to the head page.
1451 * @returns NULL if not found.
1452 * @param pPool The Pool
1453 * @param pNewPage The page which is going to be monitored.
1454 */
1455static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1456{
1457#ifdef PGMPOOL_WITH_CACHE
1458 /*
1459 * Look up the GCPhys in the hash.
1460 */
1461 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1462 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1463 if (i == NIL_PGMPOOL_IDX)
1464 return NULL;
1465 do
1466 {
1467 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1468 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1469 && pPage != pNewPage)
1470 {
1471 switch (pPage->enmKind)
1472 {
1473 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1474 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1475 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1476 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1477 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1478 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1479 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1480 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1481 case PGMPOOLKIND_ROOT_32BIT_PD:
1482 case PGMPOOLKIND_ROOT_PAE_PD:
1483 case PGMPOOLKIND_ROOT_PDPT:
1484 {
1485 /* find the head */
1486 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1487 {
1488 Assert(pPage->iMonitoredPrev != pPage->idx);
1489 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1490 }
1491 return pPage;
1492 }
1493
1494 /* ignore, no monitoring. */
1495 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1496 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1497 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1498 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1499 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1500 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1501 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1502 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1503 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1504 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1505 case PGMPOOLKIND_ROOT_NESTED:
1506 break;
1507 default:
1508 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1509 }
1510 }
1511
1512 /* next */
1513 i = pPage->iNext;
1514 } while (i != NIL_PGMPOOL_IDX);
1515#endif
1516 return NULL;
1517}
1518
1519
1520/**
1521 * Enabled write monitoring of a guest page.
1522 *
1523 * @returns VBox status code.
1524 * @retval VINF_SUCCESS on success.
1525 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1526 * @param pPool The pool.
1527 * @param pPage The cached page.
1528 */
1529static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1530{
1531 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1532
1533 /*
1534 * Filter out the relevant kinds.
1535 */
1536 switch (pPage->enmKind)
1537 {
1538 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1539 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1540 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1541 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1542 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1543 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1544 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1545 case PGMPOOLKIND_ROOT_PDPT:
1546 break;
1547
1548 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1549 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1550 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1551 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1552 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1553 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1554 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1555 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1556 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1557 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1558 case PGMPOOLKIND_ROOT_NESTED:
1559 /* Nothing to monitor here. */
1560 return VINF_SUCCESS;
1561
1562 case PGMPOOLKIND_ROOT_32BIT_PD:
1563 case PGMPOOLKIND_ROOT_PAE_PD:
1564#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1565 break;
1566#endif
1567 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1568 default:
1569 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1570 }
1571
1572 /*
1573 * Install handler.
1574 */
1575 int rc;
1576 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1577 if (pPageHead)
1578 {
1579 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1580 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1581 pPage->iMonitoredPrev = pPageHead->idx;
1582 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1583 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1584 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1585 pPageHead->iMonitoredNext = pPage->idx;
1586 rc = VINF_SUCCESS;
1587 }
1588 else
1589 {
1590 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1591 PVM pVM = pPool->CTX_SUFF(pVM);
1592 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1593 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1594 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1595 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1596 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1597 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1598 pPool->pszAccessHandler);
1599 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1600 * the heap size should suffice. */
1601 AssertFatalRC(rc);
1602 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1603 rc = VERR_PGM_POOL_CLEARED;
1604 }
1605 pPage->fMonitored = true;
1606 return rc;
1607}
1608
1609
1610/**
1611 * Disables write monitoring of a guest page.
1612 *
1613 * @returns VBox status code.
1614 * @retval VINF_SUCCESS on success.
1615 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1616 * @param pPool The pool.
1617 * @param pPage The cached page.
1618 */
1619static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1620{
1621 /*
1622 * Filter out the relevant kinds.
1623 */
1624 switch (pPage->enmKind)
1625 {
1626 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1627 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1628 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1629 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1630 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1631 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1632 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1633 case PGMPOOLKIND_ROOT_PDPT:
1634 break;
1635
1636 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1637 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1638 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1639 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1640 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1641 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1642 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1643 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1644 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1645 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1646 case PGMPOOLKIND_ROOT_NESTED:
1647 /* Nothing to monitor here. */
1648 return VINF_SUCCESS;
1649
1650 case PGMPOOLKIND_ROOT_32BIT_PD:
1651 case PGMPOOLKIND_ROOT_PAE_PD:
1652#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1653 break;
1654#endif
1655 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1656 default:
1657 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1658 }
1659
1660 /*
1661 * Remove the page from the monitored list or uninstall it if last.
1662 */
1663 const PVM pVM = pPool->CTX_SUFF(pVM);
1664 int rc;
1665 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1666 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1667 {
1668 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1669 {
1670 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1671 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1672 pNewHead->fCR3Mix = pPage->fCR3Mix;
1673 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1674 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1675 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1676 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
1677 pPool->pszAccessHandler);
1678 AssertFatalRCSuccess(rc);
1679 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1680 }
1681 else
1682 {
1683 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1684 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1685 {
1686 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1687 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1688 }
1689 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1690 rc = VINF_SUCCESS;
1691 }
1692 }
1693 else
1694 {
1695 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1696 AssertFatalRC(rc);
1697 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1698 rc = VERR_PGM_POOL_CLEARED;
1699 }
1700 pPage->fMonitored = false;
1701
1702 /*
1703 * Remove it from the list of modified pages (if in it).
1704 */
1705 pgmPoolMonitorModifiedRemove(pPool, pPage);
1706
1707 return rc;
1708}
1709
1710# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1711
1712/**
1713 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1714 *
1715 * @param pPool The Pool.
1716 * @param pPage A page in the chain.
1717 * @param fCR3Mix The new fCR3Mix value.
1718 */
1719static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1720{
1721 /* current */
1722 pPage->fCR3Mix = fCR3Mix;
1723
1724 /* before */
1725 int16_t idx = pPage->iMonitoredPrev;
1726 while (idx != NIL_PGMPOOL_IDX)
1727 {
1728 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1729 idx = pPool->aPages[idx].iMonitoredPrev;
1730 }
1731
1732 /* after */
1733 idx = pPage->iMonitoredNext;
1734 while (idx != NIL_PGMPOOL_IDX)
1735 {
1736 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1737 idx = pPool->aPages[idx].iMonitoredNext;
1738 }
1739}
1740
1741
1742/**
1743 * Installs or modifies monitoring of a CR3 page (special).
1744 *
1745 * We're pretending the CR3 page is shadowed by the pool so we can use the
1746 * generic mechanisms in detecting chained monitoring. (This also gives us a
1747 * tast of what code changes are required to really pool CR3 shadow pages.)
1748 *
1749 * @returns VBox status code.
1750 * @param pPool The pool.
1751 * @param idxRoot The CR3 (root) page index.
1752 * @param GCPhysCR3 The (new) CR3 value.
1753 */
1754int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1755{
1756 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1757 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1758 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d} GCPhysCR3=%RGp\n",
1759 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1760
1761 /*
1762 * The unlikely case where it already matches.
1763 */
1764 if (pPage->GCPhys == GCPhysCR3)
1765 {
1766 Assert(pPage->fMonitored);
1767 return VINF_SUCCESS;
1768 }
1769
1770 /*
1771 * Flush the current monitoring and remove it from the hash.
1772 */
1773 int rc = VINF_SUCCESS;
1774 if (pPage->fMonitored)
1775 {
1776 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1777 rc = pgmPoolMonitorFlush(pPool, pPage);
1778 if (rc == VERR_PGM_POOL_CLEARED)
1779 rc = VINF_SUCCESS;
1780 else
1781 AssertFatalRC(rc);
1782 pgmPoolHashRemove(pPool, pPage);
1783 }
1784
1785 /*
1786 * Monitor the page at the new location and insert it into the hash.
1787 */
1788 pPage->GCPhys = GCPhysCR3;
1789 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1790 if (rc2 != VERR_PGM_POOL_CLEARED)
1791 {
1792 AssertFatalRC(rc2);
1793 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1794 rc = rc2;
1795 }
1796 pgmPoolHashInsert(pPool, pPage);
1797 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1798 return rc;
1799}
1800
1801
1802/**
1803 * Removes the monitoring of a CR3 page (special).
1804 *
1805 * @returns VBox status code.
1806 * @param pPool The pool.
1807 * @param idxRoot The CR3 (root) page index.
1808 */
1809int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1810{
1811 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1812 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1813 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d}\n",
1814 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1815
1816 if (!pPage->fMonitored)
1817 return VINF_SUCCESS;
1818
1819 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1820 int rc = pgmPoolMonitorFlush(pPool, pPage);
1821 if (rc != VERR_PGM_POOL_CLEARED)
1822 AssertFatalRC(rc);
1823 else
1824 rc = VINF_SUCCESS;
1825 pgmPoolHashRemove(pPool, pPage);
1826 Assert(!pPage->fMonitored);
1827 pPage->GCPhys = NIL_RTGCPHYS;
1828 return rc;
1829}
1830
1831# endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1832
1833/**
1834 * Inserts the page into the list of modified pages.
1835 *
1836 * @param pPool The pool.
1837 * @param pPage The page.
1838 */
1839void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1840{
1841 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1842 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1843 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1844 && pPool->iModifiedHead != pPage->idx,
1845 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1846 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1847 pPool->iModifiedHead, pPool->cModifiedPages));
1848
1849 pPage->iModifiedNext = pPool->iModifiedHead;
1850 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1851 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1852 pPool->iModifiedHead = pPage->idx;
1853 pPool->cModifiedPages++;
1854#ifdef VBOX_WITH_STATISTICS
1855 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1856 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1857#endif
1858}
1859
1860
1861/**
1862 * Removes the page from the list of modified pages and resets the
1863 * moficiation counter.
1864 *
1865 * @param pPool The pool.
1866 * @param pPage The page which is believed to be in the list of modified pages.
1867 */
1868static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1869{
1870 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1871 if (pPool->iModifiedHead == pPage->idx)
1872 {
1873 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1874 pPool->iModifiedHead = pPage->iModifiedNext;
1875 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1876 {
1877 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1878 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1879 }
1880 pPool->cModifiedPages--;
1881 }
1882 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1883 {
1884 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1885 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1886 {
1887 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1888 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1889 }
1890 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1891 pPool->cModifiedPages--;
1892 }
1893 else
1894 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1895 pPage->cModifications = 0;
1896}
1897
1898
1899/**
1900 * Zaps the list of modified pages, resetting their modification counters in the process.
1901 *
1902 * @param pVM The VM handle.
1903 */
1904void pgmPoolMonitorModifiedClearAll(PVM pVM)
1905{
1906 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1907 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1908
1909 unsigned cPages = 0; NOREF(cPages);
1910 uint16_t idx = pPool->iModifiedHead;
1911 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1912 while (idx != NIL_PGMPOOL_IDX)
1913 {
1914 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1915 idx = pPage->iModifiedNext;
1916 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1917 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1918 pPage->cModifications = 0;
1919 Assert(++cPages);
1920 }
1921 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1922 pPool->cModifiedPages = 0;
1923}
1924
1925
1926#ifdef IN_RING3
1927/**
1928 * Clear all shadow pages and clear all modification counters.
1929 *
1930 * @param pVM The VM handle.
1931 * @remark Should only be used when monitoring is available, thus placed in
1932 * the PGMPOOL_WITH_MONITORING #ifdef.
1933 */
1934void pgmPoolClearAll(PVM pVM)
1935{
1936 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1937 STAM_PROFILE_START(&pPool->StatClearAll, c);
1938 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1939
1940 /*
1941 * Iterate all the pages until we've encountered all that in use.
1942 * This is simple but not quite optimal solution.
1943 */
1944 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1945 unsigned cLeft = pPool->cUsedPages;
1946 unsigned iPage = pPool->cCurPages;
1947 while (--iPage >= PGMPOOL_IDX_FIRST)
1948 {
1949 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1950 if (pPage->GCPhys != NIL_RTGCPHYS)
1951 {
1952 switch (pPage->enmKind)
1953 {
1954 /*
1955 * We only care about shadow page tables.
1956 */
1957 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1958 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1959 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1960 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1961 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1962 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1963 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1964 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1965 {
1966#ifdef PGMPOOL_WITH_USER_TRACKING
1967 if (pPage->cPresent)
1968#endif
1969 {
1970 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1971 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1972 ASMMemZeroPage(pvShw);
1973 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1974#ifdef PGMPOOL_WITH_USER_TRACKING
1975 pPage->cPresent = 0;
1976 pPage->iFirstPresent = ~0;
1977#endif
1978 }
1979 }
1980 /* fall thru */
1981
1982 default:
1983 Assert(!pPage->cModifications || ++cModifiedPages);
1984 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1985 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1986 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1987 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1988 pPage->cModifications = 0;
1989 break;
1990
1991 }
1992 if (!--cLeft)
1993 break;
1994 }
1995 }
1996
1997 /* swipe the special pages too. */
1998 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1999 {
2000 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2001 if (pPage->GCPhys != NIL_RTGCPHYS)
2002 {
2003 Assert(!pPage->cModifications || ++cModifiedPages);
2004 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2005 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2006 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2007 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2008 pPage->cModifications = 0;
2009 }
2010 }
2011
2012#ifndef DEBUG_michael
2013 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2014#endif
2015 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2016 pPool->cModifiedPages = 0;
2017
2018#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2019 /*
2020 * Clear all the GCPhys links and rebuild the phys ext free list.
2021 */
2022 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2023 pRam;
2024 pRam = pRam->CTX_SUFF(pNext))
2025 {
2026 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2027 while (iPage-- > 0)
2028 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2029 }
2030
2031 pPool->iPhysExtFreeHead = 0;
2032 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2033 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2034 for (unsigned i = 0; i < cMaxPhysExts; i++)
2035 {
2036 paPhysExts[i].iNext = i + 1;
2037 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2038 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2039 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2040 }
2041 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2042#endif
2043
2044
2045 pPool->cPresent = 0;
2046 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2047}
2048#endif /* IN_RING3 */
2049
2050
2051/**
2052 * Handle SyncCR3 pool tasks
2053 *
2054 * @returns VBox status code.
2055 * @retval VINF_SUCCESS if successfully added.
2056 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2057 * @param pVM The VM handle.
2058 * @remark Should only be used when monitoring is available, thus placed in
2059 * the PGMPOOL_WITH_MONITORING #ifdef.
2060 */
2061int pgmPoolSyncCR3(PVM pVM)
2062{
2063 /*
2064 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2065 * Occasionally we will have to clear all the shadow page tables because we wanted
2066 * to monitor a page which was mapped by too many shadowed page tables. This operation
2067 * sometimes refered to as a 'lightweight flush'.
2068 */
2069 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2070 pgmPoolMonitorModifiedClearAll(pVM);
2071 else
2072 {
2073# ifndef IN_RC //def IN_RING3 - fixing properly in a bit...
2074 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2075 pgmPoolClearAll(pVM);
2076# else /* !IN_RING3 */
2077 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2078 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2079 return VINF_PGM_SYNC_CR3;
2080# endif /* !IN_RING3 */
2081 }
2082 return VINF_SUCCESS;
2083}
2084
2085#endif /* PGMPOOL_WITH_MONITORING */
2086#ifdef PGMPOOL_WITH_USER_TRACKING
2087
2088/**
2089 * Frees up at least one user entry.
2090 *
2091 * @returns VBox status code.
2092 * @retval VINF_SUCCESS if successfully added.
2093 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2094 * @param pPool The pool.
2095 * @param iUser The user index.
2096 */
2097static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2098{
2099 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2100#ifdef PGMPOOL_WITH_CACHE
2101 /*
2102 * Just free cached pages in a braindead fashion.
2103 */
2104 /** @todo walk the age list backwards and free the first with usage. */
2105 int rc = VINF_SUCCESS;
2106 do
2107 {
2108 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2109 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2110 rc = rc2;
2111 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2112 return rc;
2113#else
2114 /*
2115 * Lazy approach.
2116 */
2117 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2118 Assert(!CPUMIsGuestInLongMode(pVM));
2119 pgmPoolFlushAllInt(pPool);
2120 return VERR_PGM_POOL_FLUSHED;
2121#endif
2122}
2123
2124
2125/**
2126 * Inserts a page into the cache.
2127 *
2128 * This will create user node for the page, insert it into the GCPhys
2129 * hash, and insert it into the age list.
2130 *
2131 * @returns VBox status code.
2132 * @retval VINF_SUCCESS if successfully added.
2133 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2134 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2135 * @param pPool The pool.
2136 * @param pPage The cached page.
2137 * @param GCPhys The GC physical address of the page we're gonna shadow.
2138 * @param iUser The user index.
2139 * @param iUserTable The user table index.
2140 */
2141DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2142{
2143 int rc = VINF_SUCCESS;
2144 PPGMPOOLUSER pUser = pPool->CTX_SUFF(paUsers);
2145
2146 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
2147
2148 /*
2149 * Find free a user node.
2150 */
2151 uint16_t i = pPool->iUserFreeHead;
2152 if (i == NIL_PGMPOOL_USER_INDEX)
2153 {
2154 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2155 if (RT_FAILURE(rc))
2156 return rc;
2157 i = pPool->iUserFreeHead;
2158 }
2159
2160 /*
2161 * Unlink the user node from the free list,
2162 * initialize and insert it into the user list.
2163 */
2164 pPool->iUserFreeHead = pUser[i].iNext;
2165 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
2166 pUser[i].iUser = iUser;
2167 pUser[i].iUserTable = iUserTable;
2168 pPage->iUserHead = i;
2169
2170 /*
2171 * Insert into cache and enable monitoring of the guest page if enabled.
2172 *
2173 * Until we implement caching of all levels, including the CR3 one, we'll
2174 * have to make sure we don't try monitor & cache any recursive reuse of
2175 * a monitored CR3 page. Because all windows versions are doing this we'll
2176 * have to be able to do combined access monitoring, CR3 + PT and
2177 * PD + PT (guest PAE).
2178 *
2179 * Update:
2180 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2181 */
2182#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2183# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2184 const bool fCanBeMonitored = true;
2185# else
2186 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2187 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2188 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2189# endif
2190# ifdef PGMPOOL_WITH_CACHE
2191 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2192# endif
2193 if (fCanBeMonitored)
2194 {
2195# ifdef PGMPOOL_WITH_MONITORING
2196 rc = pgmPoolMonitorInsert(pPool, pPage);
2197 if (rc == VERR_PGM_POOL_CLEARED)
2198 {
2199 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2200# ifndef PGMPOOL_WITH_CACHE
2201 pgmPoolMonitorFlush(pPool, pPage);
2202 rc = VERR_PGM_POOL_FLUSHED;
2203# endif
2204 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2205 pUser[i].iNext = pPool->iUserFreeHead;
2206 pUser[i].iUser = NIL_PGMPOOL_IDX;
2207 pPool->iUserFreeHead = i;
2208 }
2209 }
2210# endif
2211#endif /* PGMPOOL_WITH_MONITORING */
2212 return rc;
2213}
2214
2215
2216# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2217/**
2218 * Adds a user reference to a page.
2219 *
2220 * This will
2221 * This will move the page to the head of the
2222 *
2223 * @returns VBox status code.
2224 * @retval VINF_SUCCESS if successfully added.
2225 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2226 * @param pPool The pool.
2227 * @param pPage The cached page.
2228 * @param iUser The user index.
2229 * @param iUserTable The user table.
2230 */
2231static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2232{
2233 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2234
2235 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
2236# ifdef VBOX_STRICT
2237 /*
2238 * Check that the entry doesn't already exists.
2239 */
2240 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2241 {
2242 uint16_t i = pPage->iUserHead;
2243 do
2244 {
2245 Assert(i < pPool->cMaxUsers);
2246 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2247 i = paUsers[i].iNext;
2248 } while (i != NIL_PGMPOOL_USER_INDEX);
2249 }
2250# endif
2251
2252 /*
2253 * Allocate a user node.
2254 */
2255 uint16_t i = pPool->iUserFreeHead;
2256 if (i == NIL_PGMPOOL_USER_INDEX)
2257 {
2258 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2259 if (RT_FAILURE(rc))
2260 return rc;
2261 i = pPool->iUserFreeHead;
2262 }
2263 pPool->iUserFreeHead = paUsers[i].iNext;
2264
2265 /*
2266 * Initialize the user node and insert it.
2267 */
2268 paUsers[i].iNext = pPage->iUserHead;
2269 paUsers[i].iUser = iUser;
2270 paUsers[i].iUserTable = iUserTable;
2271 pPage->iUserHead = i;
2272
2273# ifdef PGMPOOL_WITH_CACHE
2274 /*
2275 * Tell the cache to update its replacement stats for this page.
2276 */
2277 pgmPoolCacheUsed(pPool, pPage);
2278# endif
2279 return VINF_SUCCESS;
2280}
2281# endif /* PGMPOOL_WITH_CACHE */
2282
2283
2284/**
2285 * Frees a user record associated with a page.
2286 *
2287 * This does not clear the entry in the user table, it simply replaces the
2288 * user record to the chain of free records.
2289 *
2290 * @param pPool The pool.
2291 * @param HCPhys The HC physical address of the shadow page.
2292 * @param iUser The shadow page pool index of the user table.
2293 * @param iUserTable The index into the user table (shadowed).
2294 */
2295static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2296{
2297 /*
2298 * Unlink and free the specified user entry.
2299 */
2300 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2301
2302 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2303 uint16_t i = pPage->iUserHead;
2304 if ( i != NIL_PGMPOOL_USER_INDEX
2305 && paUsers[i].iUser == iUser
2306 && paUsers[i].iUserTable == iUserTable)
2307 {
2308 pPage->iUserHead = paUsers[i].iNext;
2309
2310 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2311 paUsers[i].iNext = pPool->iUserFreeHead;
2312 pPool->iUserFreeHead = i;
2313 return;
2314 }
2315
2316 /* General: Linear search. */
2317 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2318 while (i != NIL_PGMPOOL_USER_INDEX)
2319 {
2320 if ( paUsers[i].iUser == iUser
2321 && paUsers[i].iUserTable == iUserTable)
2322 {
2323 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2324 paUsers[iPrev].iNext = paUsers[i].iNext;
2325 else
2326 pPage->iUserHead = paUsers[i].iNext;
2327
2328 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2329 paUsers[i].iNext = pPool->iUserFreeHead;
2330 pPool->iUserFreeHead = i;
2331 return;
2332 }
2333 iPrev = i;
2334 i = paUsers[i].iNext;
2335 }
2336
2337 /* Fatal: didn't find it */
2338 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2339 iUser, iUserTable, pPage->GCPhys));
2340}
2341
2342
2343/**
2344 * Gets the entry size of a shadow table.
2345 *
2346 * @param enmKind The kind of page.
2347 *
2348 * @returns The size of the entry in bytes. That is, 4 or 8.
2349 * @returns If the kind is not for a table, an assertion is raised and 0 is
2350 * returned.
2351 */
2352DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2353{
2354 switch (enmKind)
2355 {
2356 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2357 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2358 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2359 case PGMPOOLKIND_ROOT_32BIT_PD:
2360 return 4;
2361
2362 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2363 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2364 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2365 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2366 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2367 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2368 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2369 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2370 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2371 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2372 case PGMPOOLKIND_ROOT_PAE_PD:
2373 case PGMPOOLKIND_ROOT_PDPT:
2374 case PGMPOOLKIND_ROOT_NESTED:
2375 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2376 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2377 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2378 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2379 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2380 return 8;
2381
2382 default:
2383 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2384 }
2385}
2386
2387
2388/**
2389 * Gets the entry size of a guest table.
2390 *
2391 * @param enmKind The kind of page.
2392 *
2393 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2394 * @returns If the kind is not for a table, an assertion is raised and 0 is
2395 * returned.
2396 */
2397DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2398{
2399 switch (enmKind)
2400 {
2401 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2402 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2403 case PGMPOOLKIND_ROOT_32BIT_PD:
2404 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2405 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2406 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2407 return 4;
2408
2409 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2410 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2411 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2412 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2413 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2414 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2415 case PGMPOOLKIND_ROOT_PAE_PD:
2416 case PGMPOOLKIND_ROOT_PDPT:
2417 return 8;
2418
2419 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2420 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2421 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2422 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2423 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2424 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2425 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2426 case PGMPOOLKIND_ROOT_NESTED:
2427 /** @todo can we return 0? (nobody is calling this...) */
2428 AssertFailed();
2429 return 0;
2430
2431 default:
2432 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2433 }
2434}
2435
2436#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2437
2438/**
2439 * Scans one shadow page table for mappings of a physical page.
2440 *
2441 * @param pVM The VM handle.
2442 * @param pPhysPage The guest page in question.
2443 * @param iShw The shadow page table.
2444 * @param cRefs The number of references made in that PT.
2445 */
2446static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2447{
2448 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2449 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2450
2451 /*
2452 * Assert sanity.
2453 */
2454 Assert(cRefs == 1);
2455 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2456 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2457
2458 /*
2459 * Then, clear the actual mappings to the page in the shadow PT.
2460 */
2461 switch (pPage->enmKind)
2462 {
2463 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2464 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2465 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2466 {
2467 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2468 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2469 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2470 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2471 {
2472 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2473 pPT->a[i].u = 0;
2474 cRefs--;
2475 if (!cRefs)
2476 return;
2477 }
2478#ifdef LOG_ENABLED
2479 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2480 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2481 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2482 {
2483 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2484 pPT->a[i].u = 0;
2485 }
2486#endif
2487 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2488 break;
2489 }
2490
2491 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2492 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2493 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2494 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2495 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2496 {
2497 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2498 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2499 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2500 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2501 {
2502 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2503 pPT->a[i].u = 0;
2504 cRefs--;
2505 if (!cRefs)
2506 return;
2507 }
2508#ifdef LOG_ENABLED
2509 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2510 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2511 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2512 {
2513 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2514 pPT->a[i].u = 0;
2515 }
2516#endif
2517 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
2518 break;
2519 }
2520
2521 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2522 {
2523 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2524 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2525 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2526 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2527 {
2528 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2529 pPT->a[i].u = 0;
2530 cRefs--;
2531 if (!cRefs)
2532 return;
2533 }
2534#ifdef LOG_ENABLED
2535 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2536 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2537 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2538 {
2539 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2540 pPT->a[i].u = 0;
2541 }
2542#endif
2543 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2544 break;
2545 }
2546
2547 default:
2548 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2549 }
2550}
2551
2552
2553/**
2554 * Scans one shadow page table for mappings of a physical page.
2555 *
2556 * @param pVM The VM handle.
2557 * @param pPhysPage The guest page in question.
2558 * @param iShw The shadow page table.
2559 * @param cRefs The number of references made in that PT.
2560 */
2561void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2562{
2563 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2564 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2565 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2566 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2567 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2568 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2569}
2570
2571
2572/**
2573 * Flushes a list of shadow page tables mapping the same physical page.
2574 *
2575 * @param pVM The VM handle.
2576 * @param pPhysPage The guest page in question.
2577 * @param iPhysExt The physical cross reference extent list to flush.
2578 */
2579void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2580{
2581 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2582 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2583 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2584
2585 const uint16_t iPhysExtStart = iPhysExt;
2586 PPGMPOOLPHYSEXT pPhysExt;
2587 do
2588 {
2589 Assert(iPhysExt < pPool->cMaxPhysExts);
2590 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2591 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2592 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2593 {
2594 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2595 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2596 }
2597
2598 /* next */
2599 iPhysExt = pPhysExt->iNext;
2600 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2601
2602 /* insert the list into the free list and clear the ram range entry. */
2603 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2604 pPool->iPhysExtFreeHead = iPhysExtStart;
2605 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2606
2607 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2608}
2609
2610#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2611
2612/**
2613 * Scans all shadow page tables for mappings of a physical page.
2614 *
2615 * This may be slow, but it's most likely more efficient than cleaning
2616 * out the entire page pool / cache.
2617 *
2618 * @returns VBox status code.
2619 * @retval VINF_SUCCESS if all references has been successfully cleared.
2620 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2621 * a page pool cleaning.
2622 *
2623 * @param pVM The VM handle.
2624 * @param pPhysPage The guest page in question.
2625 */
2626int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2627{
2628 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2629 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2630 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2631 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2632
2633#if 1
2634 /*
2635 * There is a limit to what makes sense.
2636 */
2637 if (pPool->cPresent > 1024)
2638 {
2639 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2640 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2641 return VINF_PGM_GCPHYS_ALIASED;
2642 }
2643#endif
2644
2645 /*
2646 * Iterate all the pages until we've encountered all that in use.
2647 * This is simple but not quite optimal solution.
2648 */
2649 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2650 const uint32_t u32 = u64;
2651 unsigned cLeft = pPool->cUsedPages;
2652 unsigned iPage = pPool->cCurPages;
2653 while (--iPage >= PGMPOOL_IDX_FIRST)
2654 {
2655 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2656 if (pPage->GCPhys != NIL_RTGCPHYS)
2657 {
2658 switch (pPage->enmKind)
2659 {
2660 /*
2661 * We only care about shadow page tables.
2662 */
2663 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2664 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2665 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2666 {
2667 unsigned cPresent = pPage->cPresent;
2668 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2669 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2670 if (pPT->a[i].n.u1Present)
2671 {
2672 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2673 {
2674 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2675 pPT->a[i].u = 0;
2676 }
2677 if (!--cPresent)
2678 break;
2679 }
2680 break;
2681 }
2682
2683 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2684 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2685 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2686 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2687 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2688 {
2689 unsigned cPresent = pPage->cPresent;
2690 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2691 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2692 if (pPT->a[i].n.u1Present)
2693 {
2694 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2695 {
2696 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2697 pPT->a[i].u = 0;
2698 }
2699 if (!--cPresent)
2700 break;
2701 }
2702 break;
2703 }
2704 }
2705 if (!--cLeft)
2706 break;
2707 }
2708 }
2709
2710 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2711 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2712 return VINF_SUCCESS;
2713}
2714
2715
2716/**
2717 * Clears the user entry in a user table.
2718 *
2719 * This is used to remove all references to a page when flushing it.
2720 */
2721static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2722{
2723 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2724 Assert(pUser->iUser < pPool->cCurPages);
2725 uint32_t iUserTable = pUser->iUserTable;
2726
2727 /*
2728 * Map the user page.
2729 */
2730 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2731#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2732 if (pUserPage->enmKind == PGMPOOLKIND_ROOT_PAE_PD)
2733 {
2734 /* Must translate the fake 2048 entry PD to a 512 PD one since the R0 mapping is not linear. */
2735 Assert(pUser->iUser == PGMPOOL_IDX_PAE_PD);
2736 uint32_t iPdpt = iUserTable / X86_PG_PAE_ENTRIES;
2737 iUserTable %= X86_PG_PAE_ENTRIES;
2738 pUserPage = &pPool->aPages[PGMPOOL_IDX_PAE_PD_0 + iPdpt];
2739 Assert(pUserPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD);
2740 }
2741#endif
2742 union
2743 {
2744 uint64_t *pau64;
2745 uint32_t *pau32;
2746 } u;
2747 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
2748
2749 /* Safety precaution in case we change the paging for other modes too in the future. */
2750 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
2751
2752#ifdef VBOX_STRICT
2753 /*
2754 * Some sanity checks.
2755 */
2756 switch (pUserPage->enmKind)
2757 {
2758 case PGMPOOLKIND_ROOT_32BIT_PD:
2759 Assert(iUserTable < X86_PG_ENTRIES);
2760 Assert(!(u.pau32[iUserTable] & PGM_PDFLAGS_MAPPING));
2761 break;
2762# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2763 case PGMPOOLKIND_ROOT_PAE_PD:
2764 Assert(iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2765 AssertMsg(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING), ("%llx %d\n", u.pau64[iUserTable], iUserTable));
2766 break;
2767# endif
2768 case PGMPOOLKIND_ROOT_PDPT:
2769 Assert(iUserTable < 4);
2770 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2771 break;
2772 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2773 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2774 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2775 break;
2776 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2777 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2778 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
2779 break;
2780 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2781 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2782 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2783 break;
2784 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2785 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2786 /* GCPhys >> PAGE_SHIFT is the index here */
2787 break;
2788 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2789 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2790 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2791 break;
2792
2793 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2794 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2795 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2796 break;
2797
2798 case PGMPOOLKIND_ROOT_NESTED:
2799 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2800 break;
2801
2802 default:
2803 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2804 break;
2805 }
2806#endif /* VBOX_STRICT */
2807
2808 /*
2809 * Clear the entry in the user page.
2810 */
2811 switch (pUserPage->enmKind)
2812 {
2813 /* 32-bit entries */
2814 case PGMPOOLKIND_ROOT_32BIT_PD:
2815 u.pau32[iUserTable] = 0;
2816 break;
2817
2818 /* 64-bit entries */
2819 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2820 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2821 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2822 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2823 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2824 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2825 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2826#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2827 case PGMPOOLKIND_ROOT_PAE_PD:
2828#endif
2829 case PGMPOOLKIND_ROOT_PDPT:
2830 case PGMPOOLKIND_ROOT_NESTED:
2831 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2832 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2833 u.pau64[iUserTable] = 0;
2834 break;
2835
2836 default:
2837 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2838 }
2839}
2840
2841
2842/**
2843 * Clears all users of a page.
2844 */
2845static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2846{
2847 /*
2848 * Free all the user records.
2849 */
2850 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2851 uint16_t i = pPage->iUserHead;
2852 while (i != NIL_PGMPOOL_USER_INDEX)
2853 {
2854 /* Clear enter in user table. */
2855 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2856
2857 /* Free it. */
2858 const uint16_t iNext = paUsers[i].iNext;
2859 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2860 paUsers[i].iNext = pPool->iUserFreeHead;
2861 pPool->iUserFreeHead = i;
2862
2863 /* Next. */
2864 i = iNext;
2865 }
2866 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2867}
2868
2869#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2870
2871/**
2872 * Allocates a new physical cross reference extent.
2873 *
2874 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2875 * @param pVM The VM handle.
2876 * @param piPhysExt Where to store the phys ext index.
2877 */
2878PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2879{
2880 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2881 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2882 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2883 {
2884 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2885 return NULL;
2886 }
2887 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2888 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2889 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2890 *piPhysExt = iPhysExt;
2891 return pPhysExt;
2892}
2893
2894
2895/**
2896 * Frees a physical cross reference extent.
2897 *
2898 * @param pVM The VM handle.
2899 * @param iPhysExt The extent to free.
2900 */
2901void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2902{
2903 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2904 Assert(iPhysExt < pPool->cMaxPhysExts);
2905 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2906 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2907 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2908 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2909 pPool->iPhysExtFreeHead = iPhysExt;
2910}
2911
2912
2913/**
2914 * Frees a physical cross reference extent.
2915 *
2916 * @param pVM The VM handle.
2917 * @param iPhysExt The extent to free.
2918 */
2919void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2920{
2921 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2922
2923 const uint16_t iPhysExtStart = iPhysExt;
2924 PPGMPOOLPHYSEXT pPhysExt;
2925 do
2926 {
2927 Assert(iPhysExt < pPool->cMaxPhysExts);
2928 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2929 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2930 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2931
2932 /* next */
2933 iPhysExt = pPhysExt->iNext;
2934 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2935
2936 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2937 pPool->iPhysExtFreeHead = iPhysExtStart;
2938}
2939
2940
2941/**
2942 * Insert a reference into a list of physical cross reference extents.
2943 *
2944 * @returns The new ram range flags (top 16-bits).
2945 *
2946 * @param pVM The VM handle.
2947 * @param iPhysExt The physical extent index of the list head.
2948 * @param iShwPT The shadow page table index.
2949 *
2950 */
2951static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2952{
2953 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2954 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2955
2956 /* special common case. */
2957 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2958 {
2959 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2960 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2961 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2962 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2963 }
2964
2965 /* general treatment. */
2966 const uint16_t iPhysExtStart = iPhysExt;
2967 unsigned cMax = 15;
2968 for (;;)
2969 {
2970 Assert(iPhysExt < pPool->cMaxPhysExts);
2971 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2972 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2973 {
2974 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2975 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2976 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2977 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2978 }
2979 if (!--cMax)
2980 {
2981 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2982 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2983 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2984 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2985 }
2986 }
2987
2988 /* add another extent to the list. */
2989 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2990 if (!pNew)
2991 {
2992 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2993 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2994 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2995 }
2996 pNew->iNext = iPhysExtStart;
2997 pNew->aidx[0] = iShwPT;
2998 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2999 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3000}
3001
3002
3003/**
3004 * Add a reference to guest physical page where extents are in use.
3005 *
3006 * @returns The new ram range flags (top 16-bits).
3007 *
3008 * @param pVM The VM handle.
3009 * @param u16 The ram range flags (top 16-bits).
3010 * @param iShwPT The shadow page table index.
3011 */
3012uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3013{
3014 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
3015 {
3016 /*
3017 * Convert to extent list.
3018 */
3019 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
3020 uint16_t iPhysExt;
3021 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3022 if (pPhysExt)
3023 {
3024 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
3025 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3026 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
3027 pPhysExt->aidx[1] = iShwPT;
3028 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3029 }
3030 else
3031 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3032 }
3033 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
3034 {
3035 /*
3036 * Insert into the extent list.
3037 */
3038 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
3039 }
3040 else
3041 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3042 return u16;
3043}
3044
3045
3046/**
3047 * Clear references to guest physical memory.
3048 *
3049 * @param pPool The pool.
3050 * @param pPage The page.
3051 * @param pPhysPage Pointer to the aPages entry in the ram range.
3052 */
3053void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3054{
3055 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
3056 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3057
3058 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
3059 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
3060 {
3061 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3062 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3063 do
3064 {
3065 Assert(iPhysExt < pPool->cMaxPhysExts);
3066
3067 /*
3068 * Look for the shadow page and check if it's all freed.
3069 */
3070 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3071 {
3072 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3073 {
3074 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3075
3076 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3077 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3078 {
3079 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3080 return;
3081 }
3082
3083 /* we can free the node. */
3084 PVM pVM = pPool->CTX_SUFF(pVM);
3085 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3086 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3087 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3088 {
3089 /* lonely node */
3090 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3091 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
3092 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3093 }
3094 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3095 {
3096 /* head */
3097 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
3098 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
3099 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
3100 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
3101 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3102 }
3103 else
3104 {
3105 /* in list */
3106 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3107 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3108 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3109 }
3110 iPhysExt = iPhysExtNext;
3111 return;
3112 }
3113 }
3114
3115 /* next */
3116 iPhysExtPrev = iPhysExt;
3117 iPhysExt = paPhysExts[iPhysExt].iNext;
3118 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3119
3120 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3121 }
3122 else /* nothing to do */
3123 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
3124}
3125
3126
3127/**
3128 * Clear references to guest physical memory.
3129 *
3130 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3131 * is assumed to be correct, so the linear search can be skipped and we can assert
3132 * at an earlier point.
3133 *
3134 * @param pPool The pool.
3135 * @param pPage The page.
3136 * @param HCPhys The host physical address corresponding to the guest page.
3137 * @param GCPhys The guest physical address corresponding to HCPhys.
3138 */
3139static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3140{
3141 /*
3142 * Walk range list.
3143 */
3144 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3145 while (pRam)
3146 {
3147 RTGCPHYS off = GCPhys - pRam->GCPhys;
3148 if (off < pRam->cb)
3149 {
3150 /* does it match? */
3151 const unsigned iPage = off >> PAGE_SHIFT;
3152 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3153#ifdef LOG_ENABLED
3154RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3155Log(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3156#endif
3157 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3158 {
3159 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3160 return;
3161 }
3162 break;
3163 }
3164 pRam = pRam->CTX_SUFF(pNext);
3165 }
3166 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3167}
3168
3169
3170/**
3171 * Clear references to guest physical memory.
3172 *
3173 * @param pPool The pool.
3174 * @param pPage The page.
3175 * @param HCPhys The host physical address corresponding to the guest page.
3176 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3177 */
3178static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3179{
3180 /*
3181 * Walk range list.
3182 */
3183 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3184 while (pRam)
3185 {
3186 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3187 if (off < pRam->cb)
3188 {
3189 /* does it match? */
3190 const unsigned iPage = off >> PAGE_SHIFT;
3191 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3192 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3193 {
3194 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3195 return;
3196 }
3197 break;
3198 }
3199 pRam = pRam->CTX_SUFF(pNext);
3200 }
3201
3202 /*
3203 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3204 */
3205 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3206 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3207 while (pRam)
3208 {
3209 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3210 while (iPage-- > 0)
3211 {
3212 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3213 {
3214 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3215 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3216 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3217 return;
3218 }
3219 }
3220 pRam = pRam->CTX_SUFF(pNext);
3221 }
3222
3223 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3224}
3225
3226
3227/**
3228 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3229 *
3230 * @param pPool The pool.
3231 * @param pPage The page.
3232 * @param pShwPT The shadow page table (mapping of the page).
3233 * @param pGstPT The guest page table.
3234 */
3235DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3236{
3237 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3238 if (pShwPT->a[i].n.u1Present)
3239 {
3240 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3241 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3242 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3243 if (!--pPage->cPresent)
3244 break;
3245 }
3246}
3247
3248
3249/**
3250 * Clear references to guest physical memory in a PAE / 32-bit page table.
3251 *
3252 * @param pPool The pool.
3253 * @param pPage The page.
3254 * @param pShwPT The shadow page table (mapping of the page).
3255 * @param pGstPT The guest page table (just a half one).
3256 */
3257DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3258{
3259 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3260 if (pShwPT->a[i].n.u1Present)
3261 {
3262 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3263 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3264 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3265 }
3266}
3267
3268
3269/**
3270 * Clear references to guest physical memory in a PAE / PAE page table.
3271 *
3272 * @param pPool The pool.
3273 * @param pPage The page.
3274 * @param pShwPT The shadow page table (mapping of the page).
3275 * @param pGstPT The guest page table.
3276 */
3277DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3278{
3279 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3280 if (pShwPT->a[i].n.u1Present)
3281 {
3282 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3283 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3284 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3285 }
3286}
3287
3288
3289/**
3290 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3291 *
3292 * @param pPool The pool.
3293 * @param pPage The page.
3294 * @param pShwPT The shadow page table (mapping of the page).
3295 */
3296DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3297{
3298 RTGCPHYS GCPhys = pPage->GCPhys;
3299 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3300 if (pShwPT->a[i].n.u1Present)
3301 {
3302 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3303 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3304 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3305 }
3306}
3307
3308
3309/**
3310 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3311 *
3312 * @param pPool The pool.
3313 * @param pPage The page.
3314 * @param pShwPT The shadow page table (mapping of the page).
3315 */
3316DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3317{
3318 RTGCPHYS GCPhys = pPage->GCPhys;
3319 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3320 if (pShwPT->a[i].n.u1Present)
3321 {
3322 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3323 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3324 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3325 }
3326}
3327
3328#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3329
3330/**
3331 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3332 *
3333 * @param pPool The pool.
3334 * @param pPage The page.
3335 * @param pShwPD The shadow page directory (mapping of the page).
3336 */
3337DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3338{
3339 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3340 {
3341 if (pShwPD->a[i].n.u1Present)
3342 {
3343 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3344 if (pSubPage)
3345 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3346 else
3347 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3348 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3349 }
3350 }
3351}
3352
3353
3354/**
3355 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3356 *
3357 * @param pPool The pool.
3358 * @param pPage The page.
3359 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3360 */
3361DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3362{
3363 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3364 {
3365 if (pShwPDPT->a[i].n.u1Present)
3366 {
3367 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3368 if (pSubPage)
3369 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3370 else
3371 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3372 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3373 }
3374 }
3375}
3376
3377
3378/**
3379 * Clear references to shadowed pages in a 64-bit level 4 page table.
3380 *
3381 * @param pPool The pool.
3382 * @param pPage The page.
3383 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3384 */
3385DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3386{
3387 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3388 {
3389 if (pShwPML4->a[i].n.u1Present)
3390 {
3391 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3392 if (pSubPage)
3393 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3394 else
3395 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3396 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3397 }
3398 }
3399}
3400
3401
3402/**
3403 * Clear references to shadowed pages in an EPT page table.
3404 *
3405 * @param pPool The pool.
3406 * @param pPage The page.
3407 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3408 */
3409DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3410{
3411 RTGCPHYS GCPhys = pPage->GCPhys;
3412 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3413 if (pShwPT->a[i].n.u1Present)
3414 {
3415 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3416 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3417 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3418 }
3419}
3420
3421
3422/**
3423 * Clear references to shadowed pages in an EPT page directory.
3424 *
3425 * @param pPool The pool.
3426 * @param pPage The page.
3427 * @param pShwPD The shadow page directory (mapping of the page).
3428 */
3429DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3430{
3431 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3432 {
3433 if (pShwPD->a[i].n.u1Present)
3434 {
3435 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3436 if (pSubPage)
3437 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3438 else
3439 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3440 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3441 }
3442 }
3443}
3444
3445
3446/**
3447 * Clear references to shadowed pages in an EPT page directory pointer table.
3448 *
3449 * @param pPool The pool.
3450 * @param pPage The page.
3451 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3452 */
3453DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3454{
3455 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3456 {
3457 if (pShwPDPT->a[i].n.u1Present)
3458 {
3459 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3460 if (pSubPage)
3461 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3462 else
3463 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3464 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3465 }
3466 }
3467}
3468
3469
3470/**
3471 * Clears all references made by this page.
3472 *
3473 * This includes other shadow pages and GC physical addresses.
3474 *
3475 * @param pPool The pool.
3476 * @param pPage The page.
3477 */
3478static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3479{
3480 /*
3481 * Map the shadow page and take action according to the page kind.
3482 */
3483 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3484 switch (pPage->enmKind)
3485 {
3486#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3487 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3488 {
3489 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3490 void *pvGst;
3491 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3492 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3493 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3494 break;
3495 }
3496
3497 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3498 {
3499 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3500 void *pvGst;
3501 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3502 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3503 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3504 break;
3505 }
3506
3507 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3508 {
3509 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3510 void *pvGst;
3511 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3512 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3513 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3514 break;
3515 }
3516
3517 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3518 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3519 {
3520 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3521 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3522 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3523 break;
3524 }
3525
3526 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3527 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3528 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3529 {
3530 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3531 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3532 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3533 break;
3534 }
3535
3536#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3537 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3538 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3539 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3540 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3541 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3542 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3543 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3544 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3545 break;
3546#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3547
3548 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
3549 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3550 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3551 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3552 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3553 break;
3554
3555 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3556 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3557 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3558 break;
3559
3560 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
3561 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3562 break;
3563
3564 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3565 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
3566 break;
3567
3568 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3569 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
3570 break;
3571
3572 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3573 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
3574 break;
3575
3576 default:
3577 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3578 }
3579
3580 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3581 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3582 ASMMemZeroPage(pvShw);
3583 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3584 pPage->fZeroed = true;
3585}
3586
3587#endif /* PGMPOOL_WITH_USER_TRACKING */
3588
3589/**
3590 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3591 *
3592 * @param pPool The pool.
3593 */
3594static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3595{
3596 /*
3597 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3598 */
3599 Assert(NIL_PGMPOOL_IDX == 0);
3600 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3601 {
3602 /*
3603 * Get the page address.
3604 */
3605 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3606 union
3607 {
3608 uint64_t *pau64;
3609 uint32_t *pau32;
3610 } u;
3611
3612 /*
3613 * Mark stuff not present.
3614 */
3615 switch (pPage->enmKind)
3616 {
3617 case PGMPOOLKIND_ROOT_32BIT_PD:
3618 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3619 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3620 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3621 u.pau32[iPage] = 0;
3622 break;
3623
3624 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3625 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3626 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
3627 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3628 u.pau64[iPage] = 0;
3629 break;
3630
3631 case PGMPOOLKIND_ROOT_PDPT:
3632 /* Not root of shadowed pages currently, ignore it. */
3633 break;
3634
3635 case PGMPOOLKIND_ROOT_NESTED:
3636 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3637 ASMMemZero32(u.pau64, PAGE_SIZE);
3638 break;
3639 }
3640 }
3641
3642 /*
3643 * Paranoia (to be removed), flag a global CR3 sync.
3644 */
3645 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
3646}
3647
3648
3649/**
3650 * Flushes the entire cache.
3651 *
3652 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3653 * and execute this CR3 flush.
3654 *
3655 * @param pPool The pool.
3656 */
3657static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3658{
3659 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3660 LogFlow(("pgmPoolFlushAllInt:\n"));
3661
3662 /*
3663 * If there are no pages in the pool, there is nothing to do.
3664 */
3665 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3666 {
3667 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3668 return;
3669 }
3670
3671 /*
3672 * Nuke the free list and reinsert all pages into it.
3673 */
3674 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3675 {
3676 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3677
3678#ifdef IN_RING3
3679 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMR3, pPage->pvPageR3));
3680#endif
3681#ifdef PGMPOOL_WITH_MONITORING
3682 if (pPage->fMonitored)
3683 pgmPoolMonitorFlush(pPool, pPage);
3684 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3685 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3686 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3687 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3688 pPage->cModifications = 0;
3689#endif
3690 pPage->GCPhys = NIL_RTGCPHYS;
3691 pPage->enmKind = PGMPOOLKIND_FREE;
3692 Assert(pPage->idx == i);
3693 pPage->iNext = i + 1;
3694 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3695 pPage->fSeenNonGlobal = false;
3696 pPage->fMonitored= false;
3697 pPage->fCached = false;
3698 pPage->fReusedFlushPending = false;
3699 pPage->fCR3Mix = false;
3700#ifdef PGMPOOL_WITH_USER_TRACKING
3701 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3702#endif
3703#ifdef PGMPOOL_WITH_CACHE
3704 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3705 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3706#endif
3707 }
3708 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3709 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3710 pPool->cUsedPages = 0;
3711
3712#ifdef PGMPOOL_WITH_USER_TRACKING
3713 /*
3714 * Zap and reinitialize the user records.
3715 */
3716 pPool->cPresent = 0;
3717 pPool->iUserFreeHead = 0;
3718 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3719 const unsigned cMaxUsers = pPool->cMaxUsers;
3720 for (unsigned i = 0; i < cMaxUsers; i++)
3721 {
3722 paUsers[i].iNext = i + 1;
3723 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3724 paUsers[i].iUserTable = 0xfffffffe;
3725 }
3726 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3727#endif
3728
3729#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3730 /*
3731 * Clear all the GCPhys links and rebuild the phys ext free list.
3732 */
3733 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3734 pRam;
3735 pRam = pRam->CTX_SUFF(pNext))
3736 {
3737 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3738 while (iPage-- > 0)
3739 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3740 }
3741
3742 pPool->iPhysExtFreeHead = 0;
3743 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3744 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3745 for (unsigned i = 0; i < cMaxPhysExts; i++)
3746 {
3747 paPhysExts[i].iNext = i + 1;
3748 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3749 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3750 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3751 }
3752 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3753#endif
3754
3755#ifdef PGMPOOL_WITH_MONITORING
3756 /*
3757 * Just zap the modified list.
3758 */
3759 pPool->cModifiedPages = 0;
3760 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3761#endif
3762
3763#ifdef PGMPOOL_WITH_CACHE
3764 /*
3765 * Clear the GCPhys hash and the age list.
3766 */
3767 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
3768 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3769 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3770 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3771#endif
3772
3773 /*
3774 * Flush all the special root pages.
3775 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3776 */
3777 pgmPoolFlushAllSpecialRoots(pPool);
3778 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3779 {
3780 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3781 pPage->iNext = NIL_PGMPOOL_IDX;
3782#ifdef PGMPOOL_WITH_MONITORING
3783 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3784 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3785 pPage->cModifications = 0;
3786 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3787 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3788 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3789 if (pPage->fMonitored)
3790 {
3791 PVM pVM = pPool->CTX_SUFF(pVM);
3792 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3793 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3794 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3795 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
3796 pPool->pszAccessHandler);
3797 AssertFatalRCSuccess(rc);
3798# ifdef PGMPOOL_WITH_CACHE
3799 pgmPoolHashInsert(pPool, pPage);
3800# endif
3801 }
3802#endif
3803#ifdef PGMPOOL_WITH_USER_TRACKING
3804 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3805#endif
3806#ifdef PGMPOOL_WITH_CACHE
3807 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3808 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3809#endif
3810 }
3811
3812 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3813}
3814
3815
3816/**
3817 * Flushes a pool page.
3818 *
3819 * This moves the page to the free list after removing all user references to it.
3820 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3821 *
3822 * @returns VBox status code.
3823 * @retval VINF_SUCCESS on success.
3824 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3825 * @param pPool The pool.
3826 * @param HCPhys The HC physical address of the shadow page.
3827 */
3828int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3829{
3830 int rc = VINF_SUCCESS;
3831 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3832 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%d, .GCPhys=%RGp}\n",
3833 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3834
3835 /*
3836 * Quietly reject any attempts at flushing any of the special root pages.
3837 */
3838 if (pPage->idx < PGMPOOL_IDX_FIRST)
3839 {
3840 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3841 return VINF_SUCCESS;
3842 }
3843
3844 /*
3845 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
3846 */
3847 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
3848 {
3849 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4,
3850 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
3851 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3852 return VINF_SUCCESS;
3853 }
3854
3855 /*
3856 * Mark the page as being in need of a ASMMemZeroPage().
3857 */
3858 pPage->fZeroed = false;
3859
3860#ifdef PGMPOOL_WITH_USER_TRACKING
3861 /*
3862 * Clear the page.
3863 */
3864 pgmPoolTrackClearPageUsers(pPool, pPage);
3865 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3866 pgmPoolTrackDeref(pPool, pPage);
3867 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3868#endif
3869
3870#ifdef PGMPOOL_WITH_CACHE
3871 /*
3872 * Flush it from the cache.
3873 */
3874 pgmPoolCacheFlushPage(pPool, pPage);
3875#endif /* PGMPOOL_WITH_CACHE */
3876
3877#ifdef PGMPOOL_WITH_MONITORING
3878 /*
3879 * Deregistering the monitoring.
3880 */
3881 if (pPage->fMonitored)
3882 rc = pgmPoolMonitorFlush(pPool, pPage);
3883#endif
3884
3885 /*
3886 * Free the page.
3887 */
3888 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3889 pPage->iNext = pPool->iFreeHead;
3890 pPool->iFreeHead = pPage->idx;
3891 pPage->enmKind = PGMPOOLKIND_FREE;
3892 pPage->GCPhys = NIL_RTGCPHYS;
3893 pPage->fReusedFlushPending = false;
3894
3895 pPool->cUsedPages--;
3896 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3897 return rc;
3898}
3899
3900
3901/**
3902 * Frees a usage of a pool page.
3903 *
3904 * The caller is responsible to updating the user table so that it no longer
3905 * references the shadow page.
3906 *
3907 * @param pPool The pool.
3908 * @param HCPhys The HC physical address of the shadow page.
3909 * @param iUser The shadow page pool index of the user table.
3910 * @param iUserTable The index into the user table (shadowed).
3911 */
3912void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3913{
3914 STAM_PROFILE_START(&pPool->StatFree, a);
3915 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3916 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3917 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3918#ifdef PGMPOOL_WITH_USER_TRACKING
3919 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3920#endif
3921#ifdef PGMPOOL_WITH_CACHE
3922 if (!pPage->fCached)
3923#endif
3924 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3925 STAM_PROFILE_STOP(&pPool->StatFree, a);
3926}
3927
3928
3929/**
3930 * Makes one or more free page free.
3931 *
3932 * @returns VBox status code.
3933 * @retval VINF_SUCCESS on success.
3934 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3935 *
3936 * @param pPool The pool.
3937 * @param iUser The user of the page.
3938 */
3939static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3940{
3941 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3942
3943 /*
3944 * If the pool isn't full grown yet, expand it.
3945 */
3946 if (pPool->cCurPages < pPool->cMaxPages)
3947 {
3948 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3949#ifdef IN_RING3
3950 int rc = PGMR3PoolGrow(pPool->pVMR3);
3951#else
3952 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3953#endif
3954 if (RT_FAILURE(rc))
3955 return rc;
3956 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3957 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3958 return VINF_SUCCESS;
3959 }
3960
3961#ifdef PGMPOOL_WITH_CACHE
3962 /*
3963 * Free one cached page.
3964 */
3965 return pgmPoolCacheFreeOne(pPool, iUser);
3966#else
3967 /*
3968 * Flush the pool.
3969 * If we have tracking enabled, it should be possible to come up with
3970 * a cheap replacement strategy...
3971 */
3972 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
3973 Assert(!CPUMIsGuestInLongMode(pVM));
3974 pgmPoolFlushAllInt(pPool);
3975 return VERR_PGM_POOL_FLUSHED;
3976#endif
3977}
3978
3979
3980/**
3981 * Allocates a page from the pool.
3982 *
3983 * This page may actually be a cached page and not in need of any processing
3984 * on the callers part.
3985 *
3986 * @returns VBox status code.
3987 * @retval VINF_SUCCESS if a NEW page was allocated.
3988 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3989 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3990 * @param pVM The VM handle.
3991 * @param GCPhys The GC physical address of the page we're gonna shadow.
3992 * For 4MB and 2MB PD entries, it's the first address the
3993 * shadow PT is covering.
3994 * @param enmKind The kind of mapping.
3995 * @param iUser The shadow page pool index of the user table.
3996 * @param iUserTable The index into the user table (shadowed).
3997 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3998 */
3999int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
4000{
4001 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4002 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4003 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
4004 *ppPage = NULL;
4005
4006#ifdef PGMPOOL_WITH_CACHE
4007 if (pPool->fCacheEnabled)
4008 {
4009 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
4010 if (RT_SUCCESS(rc2))
4011 {
4012 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4013 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4014 return rc2;
4015 }
4016 }
4017#endif
4018
4019 /*
4020 * Allocate a new one.
4021 */
4022 int rc = VINF_SUCCESS;
4023 uint16_t iNew = pPool->iFreeHead;
4024 if (iNew == NIL_PGMPOOL_IDX)
4025 {
4026 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
4027 if (RT_FAILURE(rc))
4028 {
4029 if (rc != VERR_PGM_POOL_CLEARED)
4030 {
4031 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4032 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4033 return rc;
4034 }
4035 Log(("pgmPoolMakeMoreFreePages failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
4036 rc = VERR_PGM_POOL_FLUSHED;
4037 }
4038 iNew = pPool->iFreeHead;
4039 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4040 }
4041
4042 /* unlink the free head */
4043 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4044 pPool->iFreeHead = pPage->iNext;
4045 pPage->iNext = NIL_PGMPOOL_IDX;
4046
4047 /*
4048 * Initialize it.
4049 */
4050 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4051 pPage->enmKind = enmKind;
4052 pPage->GCPhys = GCPhys;
4053 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4054 pPage->fMonitored = false;
4055 pPage->fCached = false;
4056 pPage->fReusedFlushPending = false;
4057 pPage->fCR3Mix = false;
4058#ifdef PGMPOOL_WITH_MONITORING
4059 pPage->cModifications = 0;
4060 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4061 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4062#endif
4063#ifdef PGMPOOL_WITH_USER_TRACKING
4064 pPage->cPresent = 0;
4065 pPage->iFirstPresent = ~0;
4066
4067 /*
4068 * Insert into the tracking and cache. If this fails, free the page.
4069 */
4070 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4071 if (RT_FAILURE(rc3))
4072 {
4073 if (rc3 != VERR_PGM_POOL_CLEARED)
4074 {
4075 pPool->cUsedPages--;
4076 pPage->enmKind = PGMPOOLKIND_FREE;
4077 pPage->GCPhys = NIL_RTGCPHYS;
4078 pPage->iNext = pPool->iFreeHead;
4079 pPool->iFreeHead = pPage->idx;
4080 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4081 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4082 return rc3;
4083 }
4084 Log(("pgmPoolTrackInsert failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4085 rc = VERR_PGM_POOL_FLUSHED;
4086 }
4087#endif /* PGMPOOL_WITH_USER_TRACKING */
4088
4089 /*
4090 * Commit the allocation, clear the page and return.
4091 */
4092#ifdef VBOX_WITH_STATISTICS
4093 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4094 pPool->cUsedPagesHigh = pPool->cUsedPages;
4095#endif
4096
4097 if (!pPage->fZeroed)
4098 {
4099 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4100 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4101 ASMMemZeroPage(pv);
4102 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4103 }
4104
4105 *ppPage = pPage;
4106 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4107 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4108 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4109 return rc;
4110}
4111
4112
4113/**
4114 * Frees a usage of a pool page.
4115 *
4116 * @param pVM The VM handle.
4117 * @param HCPhys The HC physical address of the shadow page.
4118 * @param iUser The shadow page pool index of the user table.
4119 * @param iUserTable The index into the user table (shadowed).
4120 */
4121void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4122{
4123 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4124 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4125 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4126}
4127
4128
4129/**
4130 * Gets a in-use page in the pool by it's physical address.
4131 *
4132 * @returns Pointer to the page.
4133 * @param pVM The VM handle.
4134 * @param HCPhys The HC physical address of the shadow page.
4135 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4136 */
4137PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4138{
4139 /** @todo profile this! */
4140 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4141 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4142 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%RHp -> %p:{.idx=%d .GCPhys=%RGp .enmKind=%d}\n",
4143 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
4144 return pPage;
4145}
4146
4147
4148/**
4149 * Flushes the entire cache.
4150 *
4151 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4152 * and execute this CR3 flush.
4153 *
4154 * @param pPool The pool.
4155 */
4156void pgmPoolFlushAll(PVM pVM)
4157{
4158 LogFlow(("pgmPoolFlushAll:\n"));
4159 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4160}
4161
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette