VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 14257

Last change on this file since 14257 was 14149, checked in by vboxsync, 16 years ago

PGM: HCPhysPaePDPT -> HCPhysShwPaePdpt.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 147.8 KB
Line 
1/* $Id: PGMAllPool.cpp 14149 2008-11-12 23:23:27Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42
43
44/*******************************************************************************
45* Internal Functions *
46*******************************************************************************/
47__BEGIN_DECLS
48static void pgmPoolFlushAllInt(PPGMPOOL pPool);
49#ifdef PGMPOOL_WITH_USER_TRACKING
50DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
51DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#endif
54#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
55static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
56#endif
57#ifdef PGMPOOL_WITH_CACHE
58static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
59#endif
60#ifdef PGMPOOL_WITH_MONITORING
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#endif
63#ifndef IN_RING3
64DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
65#endif
66__END_DECLS
67
68
69/**
70 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
71 *
72 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
73 * @param enmKind The page kind.
74 */
75DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
76{
77 switch (enmKind)
78 {
79 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
82 return true;
83 default:
84 return false;
85 }
86}
87
88
89#if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
90/**
91 * Maps a pool page into the current context.
92 *
93 * @returns Pointer to the mapping.
94 * @param pVM The VM handle.
95 * @param pPage The page to map.
96 */
97void *pgmPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
98{
99 /* general pages. */
100 if (pPage->idx >= PGMPOOL_IDX_FIRST)
101 {
102 Assert(pPage->idx < pVM->pgm.s.CTX_SUFF(pPool)->cCurPages);
103 void *pv;
104 int rc = PGMDynMapHCPage(pVM, pPage->Core.Key, &pv);
105 AssertReleaseRC(rc);
106 return pv;
107 }
108
109 /* special pages. */
110# ifdef IN_RC
111 switch (pPage->idx)
112 {
113 case PGMPOOL_IDX_PD:
114 return pVM->pgm.s.pShw32BitPdRC;
115 case PGMPOOL_IDX_PAE_PD:
116 case PGMPOOL_IDX_PAE_PD_0:
117 return pVM->pgm.s.apShwPaePDsRC[0];
118 case PGMPOOL_IDX_PAE_PD_1:
119 return pVM->pgm.s.apShwPaePDsRC[1];
120 case PGMPOOL_IDX_PAE_PD_2:
121 return pVM->pgm.s.apShwPaePDsRC[2];
122 case PGMPOOL_IDX_PAE_PD_3:
123 return pVM->pgm.s.apShwPaePDsRC[3];
124 case PGMPOOL_IDX_PDPT:
125 return pVM->pgm.s.pShwPaePdptRC;
126 default:
127 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
128 return NULL;
129 }
130
131# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
132 RTHCPHYS HCPhys;
133 switch (pPage->idx)
134 {
135 case PGMPOOL_IDX_PD:
136 HCPhys = pVM->pgm.s.HCPhysShw32BitPD;
137 break;
138 case PGMPOOL_IDX_PAE_PD:
139 case PGMPOOL_IDX_PAE_PD_0:
140 HCPhys = pVM->pgm.s.aHCPhysPaePDs[0];
141 break;
142 case PGMPOOL_IDX_PAE_PD_1:
143 HCPhys = pVM->pgm.s.aHCPhysPaePDs[1];
144 break;
145 case PGMPOOL_IDX_PAE_PD_2:
146 HCPhys = pVM->pgm.s.aHCPhysPaePDs[2];
147 break;
148 case PGMPOOL_IDX_PAE_PD_3:
149 HCPhys = pVM->pgm.s.aHCPhysPaePDs[3];
150 break;
151 case PGMPOOL_IDX_PDPT:
152 HCPhys = pVM->pgm.s.HCPhysShwPaePdpt;
153 break;
154 default:
155 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
156 return NULL;
157 }
158 void *pv;
159 int rc = PGMDynMapHCPage(pVM, pPage->Core.Key, &pv);
160 AssertReleaseRC(rc);
161 return pv;
162# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
163}
164#endif /* IN_RC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
165
166
167#ifdef PGMPOOL_WITH_MONITORING
168/**
169 * Determin the size of a write instruction.
170 * @returns number of bytes written.
171 * @param pDis The disassembler state.
172 */
173static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
174{
175 /*
176 * This is very crude and possibly wrong for some opcodes,
177 * but since it's not really supposed to be called we can
178 * probably live with that.
179 */
180 return DISGetParamSize(pDis, &pDis->param1);
181}
182
183
184/**
185 * Flushes a chain of pages sharing the same access monitor.
186 *
187 * @returns VBox status code suitable for scheduling.
188 * @param pPool The pool.
189 * @param pPage A page in the chain.
190 */
191int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
192{
193 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
194
195 /*
196 * Find the list head.
197 */
198 uint16_t idx = pPage->idx;
199 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
200 {
201 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
202 {
203 idx = pPage->iMonitoredPrev;
204 Assert(idx != pPage->idx);
205 pPage = &pPool->aPages[idx];
206 }
207 }
208
209 /*
210 * Iterate the list flushing each shadow page.
211 */
212 int rc = VINF_SUCCESS;
213 for (;;)
214 {
215 idx = pPage->iMonitoredNext;
216 Assert(idx != pPage->idx);
217 if (pPage->idx >= PGMPOOL_IDX_FIRST)
218 {
219 int rc2 = pgmPoolFlushPage(pPool, pPage);
220 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
221 rc = VINF_PGM_SYNC_CR3;
222 }
223 /* next */
224 if (idx == NIL_PGMPOOL_IDX)
225 break;
226 pPage = &pPool->aPages[idx];
227 }
228 return rc;
229}
230
231
232/**
233 * Wrapper for getting the current context pointer to the entry being modified.
234 *
235 * @returns Pointer to the current context mapping of the entry.
236 * @param pPool The pool.
237 * @param pvFault The fault virtual address.
238 * @param GCPhysFault The fault physical address.
239 * @param cbEntry The entry size.
240 */
241#ifdef IN_RING3
242DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
243#else
244DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
245#endif
246{
247#ifdef IN_RC
248 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
249
250#elif defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
251 void *pvRet;
252 int rc = PGMDynMapGCPageOff(pPool->pVMR0, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
253 AssertFatalRCSuccess(rc);
254 return pvRet;
255
256#elif defined(IN_RING0)
257 void *pvRet;
258 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMR0->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
259 AssertFatalRCSuccess(rc);
260 return pvRet;
261
262#elif defined(IN_RING3)
263 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
264#else
265# error "huh?"
266#endif
267}
268
269
270/**
271 * Process shadow entries before they are changed by the guest.
272 *
273 * For PT entries we will clear them. For PD entries, we'll simply check
274 * for mapping conflicts and set the SyncCR3 FF if found.
275 *
276 * @param pPool The pool.
277 * @param pPage The head page.
278 * @param GCPhysFault The guest physical fault address.
279 * @param uAddress In R0 and GC this is the guest context fault address (flat).
280 * In R3 this is the host context 'fault' address.
281 * @param pCpu The disassembler state for figuring out the write size.
282 * This need not be specified if the caller knows we won't do cross entry accesses.
283 */
284#ifdef IN_RING3
285void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
286#else
287void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
288#endif
289{
290 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
291 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
292 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
293
294 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
295
296 for (;;)
297 {
298 union
299 {
300 void *pv;
301 PX86PT pPT;
302 PX86PTPAE pPTPae;
303 PX86PD pPD;
304 PX86PDPAE pPDPae;
305 PX86PDPT pPDPT;
306 PX86PML4 pPML4;
307 } uShw;
308 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
309
310 switch (pPage->enmKind)
311 {
312 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
313 {
314 const unsigned iShw = off / sizeof(X86PTE);
315 if (uShw.pPT->a[iShw].n.u1Present)
316 {
317# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
318 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
319 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
320 pgmPoolTracDerefGCPhysHint(pPool, pPage,
321 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
322 pGstPte->u & X86_PTE_PG_MASK);
323# endif
324 uShw.pPT->a[iShw].u = 0;
325 }
326 break;
327 }
328
329 /* page/2 sized */
330 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
331 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
332 {
333 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
334 if (uShw.pPTPae->a[iShw].n.u1Present)
335 {
336# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
337 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
338 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
339 pgmPoolTracDerefGCPhysHint(pPool, pPage,
340 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
341 pGstPte->u & X86_PTE_PG_MASK);
342# endif
343 uShw.pPTPae->a[iShw].u = 0;
344 }
345 }
346 break;
347
348 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
349 {
350 const unsigned iShw = off / sizeof(X86PTEPAE);
351 if (uShw.pPTPae->a[iShw].n.u1Present)
352 {
353# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
354 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
355 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
356 pgmPoolTracDerefGCPhysHint(pPool, pPage,
357 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
358 pGstPte->u & X86_PTE_PAE_PG_MASK);
359# endif
360 uShw.pPTPae->a[iShw].u = 0;
361 }
362
363 /* paranoia / a bit assumptive. */
364 if ( pCpu
365 && (off & 7)
366 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
367 {
368 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
369 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
370
371 if (uShw.pPTPae->a[iShw2].n.u1Present)
372 {
373# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
374 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
375 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
376 pgmPoolTracDerefGCPhysHint(pPool, pPage,
377 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
378 pGstPte->u & X86_PTE_PAE_PG_MASK);
379# endif
380 uShw.pPTPae->a[iShw2].u = 0;
381 }
382 }
383
384 break;
385 }
386
387 case PGMPOOLKIND_ROOT_32BIT_PD:
388 {
389 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
390 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
391 {
392 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
393 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
394 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
395 }
396 /* paranoia / a bit assumptive. */
397 else if ( pCpu
398 && (off & 3)
399 && (off & 3) + cbWrite > sizeof(X86PTE))
400 {
401 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
402 if ( iShw2 != iShw
403 && iShw2 < RT_ELEMENTS(uShw.pPD->a)
404 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
405 {
406 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
407 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
408 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
409 }
410 }
411#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
412 if ( uShw.pPD->a[iShw].n.u1Present
413 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
414 {
415 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
416# ifdef IN_RC /* TLB load - we're pushing things a bit... */
417 ASMProbeReadByte(pvAddress);
418# endif
419 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
420 uShw.pPD->a[iShw].u = 0;
421 }
422#endif
423 break;
424 }
425
426 case PGMPOOLKIND_ROOT_PAE_PD:
427 {
428 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
429 for (unsigned i = 0; i < 2; i++, iShw++)
430 {
431 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
432 {
433 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
434 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
435 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
436 }
437 /* paranoia / a bit assumptive. */
438 else if ( pCpu
439 && (off & 3)
440 && (off & 3) + cbWrite > 4)
441 {
442 const unsigned iShw2 = iShw + 2;
443 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a)
444 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
445 {
446 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
447 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
448 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
449 }
450 }
451#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
452 if ( uShw.pPDPae->a[iShw].n.u1Present
453 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
454 {
455 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
456# ifdef IN_RC /* TLB load - we're pushing things a bit... */
457 ASMProbeReadByte(pvAddress);
458# endif
459 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
460 uShw.pPDPae->a[iShw].u = 0;
461 }
462#endif
463 }
464 break;
465 }
466
467 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
468 {
469 const unsigned iShw = off / sizeof(X86PDEPAE);
470 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
471 {
472 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
473 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
474 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
475 }
476#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
477 /*
478 * Causes trouble when the guest uses a PDE to refer to the whole page table level
479 * structure. (Invalidate here; faults later on when it tries to change the page
480 * table entries -> recheck; probably only applies to the RC case.)
481 */
482 else
483 {
484 if (uShw.pPDPae->a[iShw].n.u1Present)
485 {
486 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
487 pgmPoolFree(pPool->CTX_SUFF(pVM),
488 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
489 /* Note: hardcoded PAE implementation dependency */
490 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
491 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
492 uShw.pPDPae->a[iShw].u = 0;
493 }
494 }
495#endif
496 /* paranoia / a bit assumptive. */
497 if ( pCpu
498 && (off & 7)
499 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
500 {
501 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
502 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
503
504 if ( iShw2 != iShw
505 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
506 {
507 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
508 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
509 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
510 }
511#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
512 else if (uShw.pPDPae->a[iShw2].n.u1Present)
513 {
514 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
515 pgmPoolFree(pPool->CTX_SUFF(pVM),
516 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
517 /* Note: hardcoded PAE implementation dependency */
518 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
519 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
520 uShw.pPDPae->a[iShw2].u = 0;
521 }
522#endif
523 }
524 break;
525 }
526
527 case PGMPOOLKIND_ROOT_PDPT:
528 {
529 /*
530 * Hopefully this doesn't happen very often:
531 * - touching unused parts of the page
532 * - messing with the bits of pd pointers without changing the physical address
533 */
534 const unsigned iShw = off / sizeof(X86PDPE);
535 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
536 {
537 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
538 {
539 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
540 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
541 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
542 }
543 /* paranoia / a bit assumptive. */
544 else if ( pCpu
545 && (off & 7)
546 && (off & 7) + cbWrite > sizeof(X86PDPE))
547 {
548 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
549 if ( iShw2 != iShw
550 && iShw2 < X86_PG_PAE_PDPE_ENTRIES
551 && uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
552 {
553 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
554 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
555 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
556 }
557 }
558 }
559 break;
560 }
561
562#ifndef IN_RC
563 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
564 {
565 Assert(pPage->enmKind == PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD);
566
567 const unsigned iShw = off / sizeof(X86PDEPAE);
568 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
569 {
570 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
571 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
572 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
573 }
574 else
575 {
576 if (uShw.pPDPae->a[iShw].n.u1Present)
577 {
578 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
579 pgmPoolFree(pPool->CTX_SUFF(pVM),
580 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
581 pPage->idx,
582 iShw);
583 uShw.pPDPae->a[iShw].u = 0;
584 }
585 }
586 /* paranoia / a bit assumptive. */
587 if ( pCpu
588 && (off & 7)
589 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
590 {
591 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
592 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
593
594 if ( iShw2 != iShw
595 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
596 {
597 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
598 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
599 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
600 }
601 else
602 if (uShw.pPDPae->a[iShw2].n.u1Present)
603 {
604 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
605 pgmPoolFree(pPool->CTX_SUFF(pVM),
606 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
607 pPage->idx,
608 iShw2);
609 uShw.pPDPae->a[iShw2].u = 0;
610 }
611 }
612 break;
613 }
614
615 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
616 {
617 /*
618 * Hopefully this doesn't happen very often:
619 * - messing with the bits of pd pointers without changing the physical address
620 */
621 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
622 {
623 const unsigned iShw = off / sizeof(X86PDPE);
624 if (uShw.pPDPT->a[iShw].n.u1Present)
625 {
626 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
627 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
628 uShw.pPDPT->a[iShw].u = 0;
629 }
630 /* paranoia / a bit assumptive. */
631 if ( pCpu
632 && (off & 7)
633 && (off & 7) + cbWrite > sizeof(X86PDPE))
634 {
635 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
636 if (uShw.pPDPT->a[iShw2].n.u1Present)
637 {
638 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
639 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
640 uShw.pPDPT->a[iShw2].u = 0;
641 }
642 }
643 }
644 break;
645 }
646
647 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
648 {
649 /*
650 * Hopefully this doesn't happen very often:
651 * - messing with the bits of pd pointers without changing the physical address
652 */
653 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
654 {
655 const unsigned iShw = off / sizeof(X86PDPE);
656 if (uShw.pPML4->a[iShw].n.u1Present)
657 {
658 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
659 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
660 uShw.pPML4->a[iShw].u = 0;
661 }
662 /* paranoia / a bit assumptive. */
663 if ( pCpu
664 && (off & 7)
665 && (off & 7) + cbWrite > sizeof(X86PDPE))
666 {
667 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
668 if (uShw.pPML4->a[iShw2].n.u1Present)
669 {
670 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
671 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
672 uShw.pPML4->a[iShw2].u = 0;
673 }
674 }
675 }
676 break;
677 }
678#endif /* IN_RING0 */
679
680 default:
681 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
682 }
683
684 /* next */
685 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
686 return;
687 pPage = &pPool->aPages[pPage->iMonitoredNext];
688 }
689}
690
691
692# ifndef IN_RING3
693/**
694 * Checks if a access could be a fork operation in progress.
695 *
696 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
697 *
698 * @returns true if it's likly that we're forking, otherwise false.
699 * @param pPool The pool.
700 * @param pCpu The disassembled instruction.
701 * @param offFault The access offset.
702 */
703DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
704{
705 /*
706 * i386 linux is using btr to clear X86_PTE_RW.
707 * The functions involved are (2.6.16 source inspection):
708 * clear_bit
709 * ptep_set_wrprotect
710 * copy_one_pte
711 * copy_pte_range
712 * copy_pmd_range
713 * copy_pud_range
714 * copy_page_range
715 * dup_mmap
716 * dup_mm
717 * copy_mm
718 * copy_process
719 * do_fork
720 */
721 if ( pCpu->pCurInstr->opcode == OP_BTR
722 && !(offFault & 4)
723 /** @todo Validate that the bit index is X86_PTE_RW. */
724 )
725 {
726 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
727 return true;
728 }
729 return false;
730}
731
732
733/**
734 * Determine whether the page is likely to have been reused.
735 *
736 * @returns true if we consider the page as being reused for a different purpose.
737 * @returns false if we consider it to still be a paging page.
738 * @param pVM VM Handle.
739 * @param pPage The page in question.
740 * @param pRegFrame Trap register frame.
741 * @param pCpu The disassembly info for the faulting instruction.
742 * @param pvFault The fault address.
743 *
744 * @remark The REP prefix check is left to the caller because of STOSD/W.
745 */
746DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
747{
748#ifndef IN_RC
749 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
750 if ( HWACCMHasPendingIrq(pVM)
751 && (pRegFrame->rsp - pvFault) < 32)
752 {
753 /* Fault caused by stack writes while trying to inject an interrupt event. */
754 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
755 return true;
756 }
757#else
758 NOREF(pVM); NOREF(pvFault);
759#endif
760
761 switch (pCpu->pCurInstr->opcode)
762 {
763 /* call implies the actual push of the return address faulted */
764 case OP_CALL:
765 Log4(("pgmPoolMonitorIsReused: CALL\n"));
766 return true;
767 case OP_PUSH:
768 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
769 return true;
770 case OP_PUSHF:
771 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
772 return true;
773 case OP_PUSHA:
774 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
775 return true;
776 case OP_FXSAVE:
777 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
778 return true;
779 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
780 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
781 return true;
782 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
783 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
784 return true;
785 case OP_MOVSWD:
786 case OP_STOSWD:
787 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
788 && pRegFrame->rcx >= 0x40
789 )
790 {
791 Assert(pCpu->mode == CPUMODE_64BIT);
792
793 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
794 return true;
795 }
796 return false;
797 }
798 if ( (pCpu->param1.flags & USE_REG_GEN32)
799 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
800 {
801 Log4(("pgmPoolMonitorIsReused: ESP\n"));
802 return true;
803 }
804
805 //if (pPage->fCR3Mix)
806 // return false;
807 return false;
808}
809
810
811/**
812 * Flushes the page being accessed.
813 *
814 * @returns VBox status code suitable for scheduling.
815 * @param pVM The VM handle.
816 * @param pPool The pool.
817 * @param pPage The pool page (head).
818 * @param pCpu The disassembly of the write instruction.
819 * @param pRegFrame The trap register frame.
820 * @param GCPhysFault The fault address as guest physical address.
821 * @param pvFault The fault address.
822 */
823static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
824 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
825{
826 /*
827 * First, do the flushing.
828 */
829 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
830
831 /*
832 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
833 */
834 uint32_t cbWritten;
835 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
836 if (RT_SUCCESS(rc2))
837 pRegFrame->rip += pCpu->opsize;
838 else if (rc2 == VERR_EM_INTERPRETER)
839 {
840#ifdef IN_RC
841 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
842 {
843 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
844 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
845 rc = VINF_SUCCESS;
846 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
847 }
848 else
849#endif
850 {
851 rc = VINF_EM_RAW_EMULATE_INSTR;
852 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
853 }
854 }
855 else
856 rc = rc2;
857
858 /* See use in pgmPoolAccessHandlerSimple(). */
859 PGM_INVL_GUEST_TLBS();
860
861 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
862 return rc;
863
864}
865
866
867/**
868 * Handles the STOSD write accesses.
869 *
870 * @returns VBox status code suitable for scheduling.
871 * @param pVM The VM handle.
872 * @param pPool The pool.
873 * @param pPage The pool page (head).
874 * @param pCpu The disassembly of the write instruction.
875 * @param pRegFrame The trap register frame.
876 * @param GCPhysFault The fault address as guest physical address.
877 * @param pvFault The fault address.
878 */
879DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
880 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
881{
882 Assert(pCpu->mode == CPUMODE_32BIT);
883
884 /*
885 * Increment the modification counter and insert it into the list
886 * of modified pages the first time.
887 */
888 if (!pPage->cModifications++)
889 pgmPoolMonitorModifiedInsert(pPool, pPage);
890
891 /*
892 * Execute REP STOSD.
893 *
894 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
895 * write situation, meaning that it's safe to write here.
896 */
897 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
898 while (pRegFrame->ecx)
899 {
900 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
901#ifdef IN_RC
902 *(uint32_t *)pu32 = pRegFrame->eax;
903#else
904 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
905#endif
906 pu32 += 4;
907 GCPhysFault += 4;
908 pRegFrame->edi += 4;
909 pRegFrame->ecx--;
910 }
911 pRegFrame->rip += pCpu->opsize;
912
913 /* See use in pgmPoolAccessHandlerSimple(). */
914 PGM_INVL_GUEST_TLBS();
915
916 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
917 return VINF_SUCCESS;
918}
919
920
921/**
922 * Handles the simple write accesses.
923 *
924 * @returns VBox status code suitable for scheduling.
925 * @param pVM The VM handle.
926 * @param pPool The pool.
927 * @param pPage The pool page (head).
928 * @param pCpu The disassembly of the write instruction.
929 * @param pRegFrame The trap register frame.
930 * @param GCPhysFault The fault address as guest physical address.
931 * @param pvFault The fault address.
932 */
933DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
934 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
935{
936 /*
937 * Increment the modification counter and insert it into the list
938 * of modified pages the first time.
939 */
940 if (!pPage->cModifications++)
941 pgmPoolMonitorModifiedInsert(pPool, pPage);
942
943 /*
944 * Clear all the pages. ASSUMES that pvFault is readable.
945 */
946 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
947
948 /*
949 * Interpret the instruction.
950 */
951 uint32_t cb;
952 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
953 if (RT_SUCCESS(rc))
954 pRegFrame->rip += pCpu->opsize;
955 else if (rc == VERR_EM_INTERPRETER)
956 {
957 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
958 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
959 rc = VINF_EM_RAW_EMULATE_INSTR;
960 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
961 }
962
963 /*
964 * Quick hack, with logging enabled we're getting stale
965 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
966 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
967 * have to be fixed to support this. But that'll have to wait till next week.
968 *
969 * An alternative is to keep track of the changed PTEs together with the
970 * GCPhys from the guest PT. This may proove expensive though.
971 *
972 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
973 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
974 */
975 PGM_INVL_GUEST_TLBS();
976
977 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
978 return rc;
979}
980
981
982/**
983 * \#PF Handler callback for PT write accesses.
984 *
985 * @returns VBox status code (appropriate for GC return).
986 * @param pVM VM Handle.
987 * @param uErrorCode CPU Error code.
988 * @param pRegFrame Trap register frame.
989 * NULL on DMA and other non CPU access.
990 * @param pvFault The fault address (cr2).
991 * @param GCPhysFault The GC physical address corresponding to pvFault.
992 * @param pvUser User argument.
993 */
994DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
995{
996 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
997 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
998 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
999 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1000
1001 /*
1002 * We should ALWAYS have the list head as user parameter. This
1003 * is because we use that page to record the changes.
1004 */
1005 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1006
1007 /*
1008 * Disassemble the faulting instruction.
1009 */
1010 DISCPUSTATE Cpu;
1011 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1012 AssertRCReturn(rc, rc);
1013
1014 /*
1015 * Check if it's worth dealing with.
1016 */
1017 bool fReused = false;
1018 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1019 || pPage->fCR3Mix)
1020 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1021 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1022 {
1023 /*
1024 * Simple instructions, no REP prefix.
1025 */
1026 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1027 {
1028 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1029 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1030 return rc;
1031 }
1032
1033 /*
1034 * Windows is frequently doing small memset() operations (netio test 4k+).
1035 * We have to deal with these or we'll kill the cache and performance.
1036 */
1037 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1038 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1039 && pRegFrame->ecx <= 0x20
1040 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1041 && !((uintptr_t)pvFault & 3)
1042 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1043 && Cpu.mode == CPUMODE_32BIT
1044 && Cpu.opmode == CPUMODE_32BIT
1045 && Cpu.addrmode == CPUMODE_32BIT
1046 && Cpu.prefix == PREFIX_REP
1047 && !pRegFrame->eflags.Bits.u1DF
1048 )
1049 {
1050 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1051 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1052 return rc;
1053 }
1054
1055 /* REP prefix, don't bother. */
1056 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1057 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1058 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1059 }
1060
1061 /*
1062 * Not worth it, so flush it.
1063 *
1064 * If we considered it to be reused, don't to back to ring-3
1065 * to emulate failed instructions since we usually cannot
1066 * interpret then. This may be a bit risky, in which case
1067 * the reuse detection must be fixed.
1068 */
1069 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1070 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1071 rc = VINF_SUCCESS;
1072 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1073 return rc;
1074}
1075
1076# endif /* !IN_RING3 */
1077#endif /* PGMPOOL_WITH_MONITORING */
1078
1079#ifdef PGMPOOL_WITH_CACHE
1080
1081/**
1082 * Inserts a page into the GCPhys hash table.
1083 *
1084 * @param pPool The pool.
1085 * @param pPage The page.
1086 */
1087DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1088{
1089 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1090 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1091 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1092 pPage->iNext = pPool->aiHash[iHash];
1093 pPool->aiHash[iHash] = pPage->idx;
1094}
1095
1096
1097/**
1098 * Removes a page from the GCPhys hash table.
1099 *
1100 * @param pPool The pool.
1101 * @param pPage The page.
1102 */
1103DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1104{
1105 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1106 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1107 if (pPool->aiHash[iHash] == pPage->idx)
1108 pPool->aiHash[iHash] = pPage->iNext;
1109 else
1110 {
1111 uint16_t iPrev = pPool->aiHash[iHash];
1112 for (;;)
1113 {
1114 const int16_t i = pPool->aPages[iPrev].iNext;
1115 if (i == pPage->idx)
1116 {
1117 pPool->aPages[iPrev].iNext = pPage->iNext;
1118 break;
1119 }
1120 if (i == NIL_PGMPOOL_IDX)
1121 {
1122 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1123 break;
1124 }
1125 iPrev = i;
1126 }
1127 }
1128 pPage->iNext = NIL_PGMPOOL_IDX;
1129}
1130
1131
1132/**
1133 * Frees up one cache page.
1134 *
1135 * @returns VBox status code.
1136 * @retval VINF_SUCCESS on success.
1137 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1138 * @param pPool The pool.
1139 * @param iUser The user index.
1140 */
1141static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1142{
1143#ifndef IN_RC
1144 const PVM pVM = pPool->CTX_SUFF(pVM);
1145#endif
1146 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1147 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1148
1149 /*
1150 * Select one page from the tail of the age list.
1151 */
1152 uint16_t iToFree = pPool->iAgeTail;
1153 if (iToFree == iUser)
1154 iToFree = pPool->aPages[iToFree].iAgePrev;
1155/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1156 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1157 {
1158 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1159 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1160 {
1161 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1162 continue;
1163 iToFree = i;
1164 break;
1165 }
1166 }
1167*/
1168
1169 Assert(iToFree != iUser);
1170 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1171
1172 PPGMPOOLPAGE pPage = &pPool->aPages[iToFree];
1173
1174 /*
1175 * Reject any attempts at flushing the currently active shadow CR3 mapping
1176 */
1177 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
1178 {
1179 /* Refresh the cr3 mapping by putting it at the head of the age list. */
1180 pgmPoolCacheUsed(pPool, pPage);
1181 return pgmPoolCacheFreeOne(pPool, iUser);
1182 }
1183
1184 int rc = pgmPoolFlushPage(pPool, pPage);
1185 if (rc == VINF_SUCCESS)
1186 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1187 return rc;
1188}
1189
1190
1191/**
1192 * Checks if a kind mismatch is really a page being reused
1193 * or if it's just normal remappings.
1194 *
1195 * @returns true if reused and the cached page (enmKind1) should be flushed
1196 * @returns false if not reused.
1197 * @param enmKind1 The kind of the cached page.
1198 * @param enmKind2 The kind of the requested page.
1199 */
1200static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1201{
1202 switch (enmKind1)
1203 {
1204 /*
1205 * Never reuse them. There is no remapping in non-paging mode.
1206 */
1207 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1208 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1209 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1210 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1211 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1212 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1213 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1214 return true;
1215
1216 /*
1217 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1218 */
1219 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1220 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1221 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1222 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1223 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1224 switch (enmKind2)
1225 {
1226 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1227 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1228 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1229 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1230 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1231 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1232 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1233 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1234 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1235 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1236 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1237 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1238 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1239 return true;
1240 default:
1241 return false;
1242 }
1243
1244 /*
1245 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1246 */
1247 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1248 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1249 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1250 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1251 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1252 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1253 switch (enmKind2)
1254 {
1255 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1256 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1257 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1258 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1259 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1260 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1261 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1262 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1263 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1264 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1265 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1266 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1267 return true;
1268 default:
1269 return false;
1270 }
1271
1272 /*
1273 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1274 */
1275 case PGMPOOLKIND_ROOT_32BIT_PD:
1276 case PGMPOOLKIND_ROOT_PAE_PD:
1277 case PGMPOOLKIND_ROOT_PDPT:
1278 case PGMPOOLKIND_ROOT_NESTED:
1279 return false;
1280
1281 default:
1282 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1283 }
1284}
1285
1286
1287/**
1288 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1289 *
1290 * @returns VBox status code.
1291 * @retval VINF_PGM_CACHED_PAGE on success.
1292 * @retval VERR_FILE_NOT_FOUND if not found.
1293 * @param pPool The pool.
1294 * @param GCPhys The GC physical address of the page we're gonna shadow.
1295 * @param enmKind The kind of mapping.
1296 * @param iUser The shadow page pool index of the user table.
1297 * @param iUserTable The index into the user table (shadowed).
1298 * @param ppPage Where to store the pointer to the page.
1299 */
1300static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1301{
1302#ifndef IN_RC
1303 const PVM pVM = pPool->CTX_SUFF(pVM);
1304#endif
1305 /*
1306 * Look up the GCPhys in the hash.
1307 */
1308 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1309 Log3(("pgmPoolCacheAlloc: %RGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1310 if (i != NIL_PGMPOOL_IDX)
1311 {
1312 do
1313 {
1314 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1315 Log3(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1316 if (pPage->GCPhys == GCPhys)
1317 {
1318 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1319 {
1320 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1321 if (RT_SUCCESS(rc))
1322 {
1323 *ppPage = pPage;
1324 STAM_COUNTER_INC(&pPool->StatCacheHits);
1325 return VINF_PGM_CACHED_PAGE;
1326 }
1327 return rc;
1328 }
1329
1330 /*
1331 * The kind is different. In some cases we should now flush the page
1332 * as it has been reused, but in most cases this is normal remapping
1333 * of PDs as PT or big pages using the GCPhys field in a slightly
1334 * different way than the other kinds.
1335 */
1336 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1337 {
1338 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1339 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1340 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1341 break;
1342 }
1343 }
1344
1345 /* next */
1346 i = pPage->iNext;
1347 } while (i != NIL_PGMPOOL_IDX);
1348 }
1349
1350 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1351 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1352 return VERR_FILE_NOT_FOUND;
1353}
1354
1355
1356/**
1357 * Inserts a page into the cache.
1358 *
1359 * @param pPool The pool.
1360 * @param pPage The cached page.
1361 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1362 */
1363static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1364{
1365 /*
1366 * Insert into the GCPhys hash if the page is fit for that.
1367 */
1368 Assert(!pPage->fCached);
1369 if (fCanBeCached)
1370 {
1371 pPage->fCached = true;
1372 pgmPoolHashInsert(pPool, pPage);
1373 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1374 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1375 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1376 }
1377 else
1378 {
1379 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1380 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1381 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1382 }
1383
1384 /*
1385 * Insert at the head of the age list.
1386 */
1387 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1388 pPage->iAgeNext = pPool->iAgeHead;
1389 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1390 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1391 else
1392 pPool->iAgeTail = pPage->idx;
1393 pPool->iAgeHead = pPage->idx;
1394}
1395
1396
1397/**
1398 * Flushes a cached page.
1399 *
1400 * @param pPool The pool.
1401 * @param pPage The cached page.
1402 */
1403static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1404{
1405 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1406
1407 /*
1408 * Remove the page from the hash.
1409 */
1410 if (pPage->fCached)
1411 {
1412 pPage->fCached = false;
1413 pgmPoolHashRemove(pPool, pPage);
1414 }
1415 else
1416 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1417
1418 /*
1419 * Remove it from the age list.
1420 */
1421 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1422 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1423 else
1424 pPool->iAgeTail = pPage->iAgePrev;
1425 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1426 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1427 else
1428 pPool->iAgeHead = pPage->iAgeNext;
1429 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1430 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1431}
1432
1433#endif /* PGMPOOL_WITH_CACHE */
1434#ifdef PGMPOOL_WITH_MONITORING
1435
1436/**
1437 * Looks for pages sharing the monitor.
1438 *
1439 * @returns Pointer to the head page.
1440 * @returns NULL if not found.
1441 * @param pPool The Pool
1442 * @param pNewPage The page which is going to be monitored.
1443 */
1444static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1445{
1446#ifdef PGMPOOL_WITH_CACHE
1447 /*
1448 * Look up the GCPhys in the hash.
1449 */
1450 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1451 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1452 if (i == NIL_PGMPOOL_IDX)
1453 return NULL;
1454 do
1455 {
1456 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1457 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1458 && pPage != pNewPage)
1459 {
1460 switch (pPage->enmKind)
1461 {
1462 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1463 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1464 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1465 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1466 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1467 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1468 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1469 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1470 case PGMPOOLKIND_ROOT_32BIT_PD:
1471 case PGMPOOLKIND_ROOT_PAE_PD:
1472 case PGMPOOLKIND_ROOT_PDPT:
1473 {
1474 /* find the head */
1475 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1476 {
1477 Assert(pPage->iMonitoredPrev != pPage->idx);
1478 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1479 }
1480 return pPage;
1481 }
1482
1483 /* ignore, no monitoring. */
1484 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1485 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1486 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1487 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1488 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1489 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1490 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1491 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1492 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1493 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1494 case PGMPOOLKIND_ROOT_NESTED:
1495 break;
1496 default:
1497 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1498 }
1499 }
1500
1501 /* next */
1502 i = pPage->iNext;
1503 } while (i != NIL_PGMPOOL_IDX);
1504#endif
1505 return NULL;
1506}
1507
1508
1509/**
1510 * Enabled write monitoring of a guest page.
1511 *
1512 * @returns VBox status code.
1513 * @retval VINF_SUCCESS on success.
1514 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1515 * @param pPool The pool.
1516 * @param pPage The cached page.
1517 */
1518static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1519{
1520 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1521
1522 /*
1523 * Filter out the relevant kinds.
1524 */
1525 switch (pPage->enmKind)
1526 {
1527 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1528 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1529 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1530 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1531 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1532 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1533 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1534 case PGMPOOLKIND_ROOT_PDPT:
1535 break;
1536
1537 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1538 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1539 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1540 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1541 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1542 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1543 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1544 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1545 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1546 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1547 case PGMPOOLKIND_ROOT_NESTED:
1548 /* Nothing to monitor here. */
1549 return VINF_SUCCESS;
1550
1551 case PGMPOOLKIND_ROOT_32BIT_PD:
1552 case PGMPOOLKIND_ROOT_PAE_PD:
1553#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1554 break;
1555#endif
1556 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1557 default:
1558 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1559 }
1560
1561 /*
1562 * Install handler.
1563 */
1564 int rc;
1565 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1566 if (pPageHead)
1567 {
1568 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1569 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1570 pPage->iMonitoredPrev = pPageHead->idx;
1571 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1572 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1573 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1574 pPageHead->iMonitoredNext = pPage->idx;
1575 rc = VINF_SUCCESS;
1576 }
1577 else
1578 {
1579 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1580 PVM pVM = pPool->CTX_SUFF(pVM);
1581 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1582 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1583 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1584 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1585 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1586 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1587 pPool->pszAccessHandler);
1588 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1589 * the heap size should suffice. */
1590 AssertFatalRC(rc);
1591 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1592 rc = VERR_PGM_POOL_CLEARED;
1593 }
1594 pPage->fMonitored = true;
1595 return rc;
1596}
1597
1598
1599/**
1600 * Disables write monitoring of a guest page.
1601 *
1602 * @returns VBox status code.
1603 * @retval VINF_SUCCESS on success.
1604 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1605 * @param pPool The pool.
1606 * @param pPage The cached page.
1607 */
1608static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1609{
1610 /*
1611 * Filter out the relevant kinds.
1612 */
1613 switch (pPage->enmKind)
1614 {
1615 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1616 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1617 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1618 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1619 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1620 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1621 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1622 case PGMPOOLKIND_ROOT_PDPT:
1623 break;
1624
1625 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1626 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1627 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1628 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1629 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1630 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1631 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1632 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1633 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1634 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1635 case PGMPOOLKIND_ROOT_NESTED:
1636 /* Nothing to monitor here. */
1637 return VINF_SUCCESS;
1638
1639 case PGMPOOLKIND_ROOT_32BIT_PD:
1640 case PGMPOOLKIND_ROOT_PAE_PD:
1641#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1642 break;
1643#endif
1644 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1645 default:
1646 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1647 }
1648
1649 /*
1650 * Remove the page from the monitored list or uninstall it if last.
1651 */
1652 const PVM pVM = pPool->CTX_SUFF(pVM);
1653 int rc;
1654 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1655 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1656 {
1657 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1658 {
1659 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1660 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1661 pNewHead->fCR3Mix = pPage->fCR3Mix;
1662 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1663 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1664 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1665 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
1666 pPool->pszAccessHandler);
1667 AssertFatalRCSuccess(rc);
1668 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1669 }
1670 else
1671 {
1672 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1673 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1674 {
1675 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1676 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1677 }
1678 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1679 rc = VINF_SUCCESS;
1680 }
1681 }
1682 else
1683 {
1684 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1685 AssertFatalRC(rc);
1686 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1687 rc = VERR_PGM_POOL_CLEARED;
1688 }
1689 pPage->fMonitored = false;
1690
1691 /*
1692 * Remove it from the list of modified pages (if in it).
1693 */
1694 pgmPoolMonitorModifiedRemove(pPool, pPage);
1695
1696 return rc;
1697}
1698
1699# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1700
1701/**
1702 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1703 *
1704 * @param pPool The Pool.
1705 * @param pPage A page in the chain.
1706 * @param fCR3Mix The new fCR3Mix value.
1707 */
1708static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1709{
1710 /* current */
1711 pPage->fCR3Mix = fCR3Mix;
1712
1713 /* before */
1714 int16_t idx = pPage->iMonitoredPrev;
1715 while (idx != NIL_PGMPOOL_IDX)
1716 {
1717 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1718 idx = pPool->aPages[idx].iMonitoredPrev;
1719 }
1720
1721 /* after */
1722 idx = pPage->iMonitoredNext;
1723 while (idx != NIL_PGMPOOL_IDX)
1724 {
1725 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1726 idx = pPool->aPages[idx].iMonitoredNext;
1727 }
1728}
1729
1730
1731/**
1732 * Installs or modifies monitoring of a CR3 page (special).
1733 *
1734 * We're pretending the CR3 page is shadowed by the pool so we can use the
1735 * generic mechanisms in detecting chained monitoring. (This also gives us a
1736 * tast of what code changes are required to really pool CR3 shadow pages.)
1737 *
1738 * @returns VBox status code.
1739 * @param pPool The pool.
1740 * @param idxRoot The CR3 (root) page index.
1741 * @param GCPhysCR3 The (new) CR3 value.
1742 */
1743int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1744{
1745 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1746 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1747 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d} GCPhysCR3=%RGp\n",
1748 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1749
1750 /*
1751 * The unlikely case where it already matches.
1752 */
1753 if (pPage->GCPhys == GCPhysCR3)
1754 {
1755 Assert(pPage->fMonitored);
1756 return VINF_SUCCESS;
1757 }
1758
1759 /*
1760 * Flush the current monitoring and remove it from the hash.
1761 */
1762 int rc = VINF_SUCCESS;
1763 if (pPage->fMonitored)
1764 {
1765 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1766 rc = pgmPoolMonitorFlush(pPool, pPage);
1767 if (rc == VERR_PGM_POOL_CLEARED)
1768 rc = VINF_SUCCESS;
1769 else
1770 AssertFatalRC(rc);
1771 pgmPoolHashRemove(pPool, pPage);
1772 }
1773
1774 /*
1775 * Monitor the page at the new location and insert it into the hash.
1776 */
1777 pPage->GCPhys = GCPhysCR3;
1778 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1779 if (rc2 != VERR_PGM_POOL_CLEARED)
1780 {
1781 AssertFatalRC(rc2);
1782 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1783 rc = rc2;
1784 }
1785 pgmPoolHashInsert(pPool, pPage);
1786 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1787 return rc;
1788}
1789
1790
1791/**
1792 * Removes the monitoring of a CR3 page (special).
1793 *
1794 * @returns VBox status code.
1795 * @param pPool The pool.
1796 * @param idxRoot The CR3 (root) page index.
1797 */
1798int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1799{
1800 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1801 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1802 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d}\n",
1803 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1804
1805 if (!pPage->fMonitored)
1806 return VINF_SUCCESS;
1807
1808 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1809 int rc = pgmPoolMonitorFlush(pPool, pPage);
1810 if (rc != VERR_PGM_POOL_CLEARED)
1811 AssertFatalRC(rc);
1812 else
1813 rc = VINF_SUCCESS;
1814 pgmPoolHashRemove(pPool, pPage);
1815 Assert(!pPage->fMonitored);
1816 pPage->GCPhys = NIL_RTGCPHYS;
1817 return rc;
1818}
1819
1820# endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1821
1822/**
1823 * Inserts the page into the list of modified pages.
1824 *
1825 * @param pPool The pool.
1826 * @param pPage The page.
1827 */
1828void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1829{
1830 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1831 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1832 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1833 && pPool->iModifiedHead != pPage->idx,
1834 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1835 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1836 pPool->iModifiedHead, pPool->cModifiedPages));
1837
1838 pPage->iModifiedNext = pPool->iModifiedHead;
1839 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1840 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1841 pPool->iModifiedHead = pPage->idx;
1842 pPool->cModifiedPages++;
1843#ifdef VBOX_WITH_STATISTICS
1844 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1845 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1846#endif
1847}
1848
1849
1850/**
1851 * Removes the page from the list of modified pages and resets the
1852 * moficiation counter.
1853 *
1854 * @param pPool The pool.
1855 * @param pPage The page which is believed to be in the list of modified pages.
1856 */
1857static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1858{
1859 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1860 if (pPool->iModifiedHead == pPage->idx)
1861 {
1862 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1863 pPool->iModifiedHead = pPage->iModifiedNext;
1864 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1865 {
1866 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1867 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1868 }
1869 pPool->cModifiedPages--;
1870 }
1871 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1872 {
1873 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1874 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1875 {
1876 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1877 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1878 }
1879 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1880 pPool->cModifiedPages--;
1881 }
1882 else
1883 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1884 pPage->cModifications = 0;
1885}
1886
1887
1888/**
1889 * Zaps the list of modified pages, resetting their modification counters in the process.
1890 *
1891 * @param pVM The VM handle.
1892 */
1893void pgmPoolMonitorModifiedClearAll(PVM pVM)
1894{
1895 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1896 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1897
1898 unsigned cPages = 0; NOREF(cPages);
1899 uint16_t idx = pPool->iModifiedHead;
1900 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1901 while (idx != NIL_PGMPOOL_IDX)
1902 {
1903 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1904 idx = pPage->iModifiedNext;
1905 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1906 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1907 pPage->cModifications = 0;
1908 Assert(++cPages);
1909 }
1910 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1911 pPool->cModifiedPages = 0;
1912}
1913
1914
1915/**
1916 * Clear all shadow pages and clear all modification counters.
1917 *
1918 * @param pVM The VM handle.
1919 * @remark Should only be used when monitoring is available, thus placed in
1920 * the PGMPOOL_WITH_MONITORING #ifdef.
1921 */
1922void pgmPoolClearAll(PVM pVM)
1923{
1924 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1925 STAM_PROFILE_START(&pPool->StatClearAll, c);
1926 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1927
1928 /*
1929 * Iterate all the pages until we've encountered all that in use.
1930 * This is simple but not quite optimal solution.
1931 */
1932 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1933 unsigned cLeft = pPool->cUsedPages;
1934 unsigned iPage = pPool->cCurPages;
1935 while (--iPage >= PGMPOOL_IDX_FIRST)
1936 {
1937 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1938 if (pPage->GCPhys != NIL_RTGCPHYS)
1939 {
1940 switch (pPage->enmKind)
1941 {
1942 /*
1943 * We only care about shadow page tables.
1944 */
1945 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1946 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1947 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1948 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1949 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1950 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1951 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1952 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1953 {
1954#ifdef PGMPOOL_WITH_USER_TRACKING
1955 if (pPage->cPresent)
1956#endif
1957 {
1958 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1959 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1960 ASMMemZeroPage(pvShw);
1961 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1962#ifdef PGMPOOL_WITH_USER_TRACKING
1963 pPage->cPresent = 0;
1964 pPage->iFirstPresent = ~0;
1965#endif
1966 }
1967 }
1968 /* fall thru */
1969
1970 default:
1971 Assert(!pPage->cModifications || ++cModifiedPages);
1972 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1973 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1974 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1975 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1976 pPage->cModifications = 0;
1977 break;
1978
1979 }
1980 if (!--cLeft)
1981 break;
1982 }
1983 }
1984
1985 /* swipe the special pages too. */
1986 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1987 {
1988 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1989 if (pPage->GCPhys != NIL_RTGCPHYS)
1990 {
1991 Assert(!pPage->cModifications || ++cModifiedPages);
1992 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1993 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1994 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1995 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1996 pPage->cModifications = 0;
1997 }
1998 }
1999
2000#ifndef DEBUG_michael
2001 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2002#endif
2003 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2004 pPool->cModifiedPages = 0;
2005
2006#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2007 /*
2008 * Clear all the GCPhys links and rebuild the phys ext free list.
2009 */
2010 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2011 pRam;
2012 pRam = pRam->CTX_SUFF(pNext))
2013 {
2014 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2015 while (iPage-- > 0)
2016 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2017 }
2018
2019 pPool->iPhysExtFreeHead = 0;
2020 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2021 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2022 for (unsigned i = 0; i < cMaxPhysExts; i++)
2023 {
2024 paPhysExts[i].iNext = i + 1;
2025 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2026 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2027 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2028 }
2029 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2030#endif
2031
2032
2033 pPool->cPresent = 0;
2034 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2035}
2036
2037
2038/**
2039 * Handle SyncCR3 pool tasks
2040 *
2041 * @returns VBox status code.
2042 * @retval VINF_SUCCESS if successfully added.
2043 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2044 * @param pVM The VM handle.
2045 * @remark Should only be used when monitoring is available, thus placed in
2046 * the PGMPOOL_WITH_MONITORING #ifdef.
2047 */
2048int pgmPoolSyncCR3(PVM pVM)
2049{
2050 /*
2051 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2052 * Occasionally we will have to clear all the shadow page tables because we wanted
2053 * to monitor a page which was mapped by too many shadowed page tables. This operation
2054 * sometimes refered to as a 'lightweight flush'.
2055 */
2056 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2057 pgmPoolMonitorModifiedClearAll(pVM);
2058 else
2059 {
2060# ifndef IN_RC
2061 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2062 pgmPoolClearAll(pVM);
2063# else
2064 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2065 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2066 return VINF_PGM_SYNC_CR3;
2067# endif
2068 }
2069 return VINF_SUCCESS;
2070}
2071
2072#endif /* PGMPOOL_WITH_MONITORING */
2073#ifdef PGMPOOL_WITH_USER_TRACKING
2074
2075/**
2076 * Frees up at least one user entry.
2077 *
2078 * @returns VBox status code.
2079 * @retval VINF_SUCCESS if successfully added.
2080 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2081 * @param pPool The pool.
2082 * @param iUser The user index.
2083 */
2084static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2085{
2086 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2087#ifdef PGMPOOL_WITH_CACHE
2088 /*
2089 * Just free cached pages in a braindead fashion.
2090 */
2091 /** @todo walk the age list backwards and free the first with usage. */
2092 int rc = VINF_SUCCESS;
2093 do
2094 {
2095 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2096 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2097 rc = rc2;
2098 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2099 return rc;
2100#else
2101 /*
2102 * Lazy approach.
2103 */
2104 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2105 Assert(!CPUMIsGuestInLongMode(pVM));
2106 pgmPoolFlushAllInt(pPool);
2107 return VERR_PGM_POOL_FLUSHED;
2108#endif
2109}
2110
2111
2112/**
2113 * Inserts a page into the cache.
2114 *
2115 * This will create user node for the page, insert it into the GCPhys
2116 * hash, and insert it into the age list.
2117 *
2118 * @returns VBox status code.
2119 * @retval VINF_SUCCESS if successfully added.
2120 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2121 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2122 * @param pPool The pool.
2123 * @param pPage The cached page.
2124 * @param GCPhys The GC physical address of the page we're gonna shadow.
2125 * @param iUser The user index.
2126 * @param iUserTable The user table index.
2127 */
2128DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2129{
2130 int rc = VINF_SUCCESS;
2131 PPGMPOOLUSER pUser = pPool->CTX_SUFF(paUsers);
2132
2133 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
2134
2135 /*
2136 * Find free a user node.
2137 */
2138 uint16_t i = pPool->iUserFreeHead;
2139 if (i == NIL_PGMPOOL_USER_INDEX)
2140 {
2141 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2142 if (RT_FAILURE(rc))
2143 return rc;
2144 i = pPool->iUserFreeHead;
2145 }
2146
2147 /*
2148 * Unlink the user node from the free list,
2149 * initialize and insert it into the user list.
2150 */
2151 pPool->iUserFreeHead = pUser[i].iNext;
2152 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
2153 pUser[i].iUser = iUser;
2154 pUser[i].iUserTable = iUserTable;
2155 pPage->iUserHead = i;
2156
2157 /*
2158 * Insert into cache and enable monitoring of the guest page if enabled.
2159 *
2160 * Until we implement caching of all levels, including the CR3 one, we'll
2161 * have to make sure we don't try monitor & cache any recursive reuse of
2162 * a monitored CR3 page. Because all windows versions are doing this we'll
2163 * have to be able to do combined access monitoring, CR3 + PT and
2164 * PD + PT (guest PAE).
2165 *
2166 * Update:
2167 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2168 */
2169#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2170# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2171 const bool fCanBeMonitored = true;
2172# else
2173 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2174 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2175 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2176# endif
2177# ifdef PGMPOOL_WITH_CACHE
2178 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2179# endif
2180 if (fCanBeMonitored)
2181 {
2182# ifdef PGMPOOL_WITH_MONITORING
2183 rc = pgmPoolMonitorInsert(pPool, pPage);
2184 if (rc == VERR_PGM_POOL_CLEARED)
2185 {
2186 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2187# ifndef PGMPOOL_WITH_CACHE
2188 pgmPoolMonitorFlush(pPool, pPage);
2189 rc = VERR_PGM_POOL_FLUSHED;
2190# endif
2191 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2192 pUser[i].iNext = pPool->iUserFreeHead;
2193 pUser[i].iUser = NIL_PGMPOOL_IDX;
2194 pPool->iUserFreeHead = i;
2195 }
2196 }
2197# endif
2198#endif /* PGMPOOL_WITH_MONITORING */
2199 return rc;
2200}
2201
2202
2203# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2204/**
2205 * Adds a user reference to a page.
2206 *
2207 * This will
2208 * This will move the page to the head of the
2209 *
2210 * @returns VBox status code.
2211 * @retval VINF_SUCCESS if successfully added.
2212 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2213 * @param pPool The pool.
2214 * @param pPage The cached page.
2215 * @param iUser The user index.
2216 * @param iUserTable The user table.
2217 */
2218static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2219{
2220 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2221
2222 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
2223# ifdef VBOX_STRICT
2224 /*
2225 * Check that the entry doesn't already exists.
2226 */
2227 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2228 {
2229 uint16_t i = pPage->iUserHead;
2230 do
2231 {
2232 Assert(i < pPool->cMaxUsers);
2233 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2234 i = paUsers[i].iNext;
2235 } while (i != NIL_PGMPOOL_USER_INDEX);
2236 }
2237# endif
2238
2239 /*
2240 * Allocate a user node.
2241 */
2242 uint16_t i = pPool->iUserFreeHead;
2243 if (i == NIL_PGMPOOL_USER_INDEX)
2244 {
2245 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2246 if (RT_FAILURE(rc))
2247 return rc;
2248 i = pPool->iUserFreeHead;
2249 }
2250 pPool->iUserFreeHead = paUsers[i].iNext;
2251
2252 /*
2253 * Initialize the user node and insert it.
2254 */
2255 paUsers[i].iNext = pPage->iUserHead;
2256 paUsers[i].iUser = iUser;
2257 paUsers[i].iUserTable = iUserTable;
2258 pPage->iUserHead = i;
2259
2260# ifdef PGMPOOL_WITH_CACHE
2261 /*
2262 * Tell the cache to update its replacement stats for this page.
2263 */
2264 pgmPoolCacheUsed(pPool, pPage);
2265# endif
2266 return VINF_SUCCESS;
2267}
2268# endif /* PGMPOOL_WITH_CACHE */
2269
2270
2271/**
2272 * Frees a user record associated with a page.
2273 *
2274 * This does not clear the entry in the user table, it simply replaces the
2275 * user record to the chain of free records.
2276 *
2277 * @param pPool The pool.
2278 * @param HCPhys The HC physical address of the shadow page.
2279 * @param iUser The shadow page pool index of the user table.
2280 * @param iUserTable The index into the user table (shadowed).
2281 */
2282static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2283{
2284 /*
2285 * Unlink and free the specified user entry.
2286 */
2287 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2288
2289 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2290 uint16_t i = pPage->iUserHead;
2291 if ( i != NIL_PGMPOOL_USER_INDEX
2292 && paUsers[i].iUser == iUser
2293 && paUsers[i].iUserTable == iUserTable)
2294 {
2295 pPage->iUserHead = paUsers[i].iNext;
2296
2297 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2298 paUsers[i].iNext = pPool->iUserFreeHead;
2299 pPool->iUserFreeHead = i;
2300 return;
2301 }
2302
2303 /* General: Linear search. */
2304 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2305 while (i != NIL_PGMPOOL_USER_INDEX)
2306 {
2307 if ( paUsers[i].iUser == iUser
2308 && paUsers[i].iUserTable == iUserTable)
2309 {
2310 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2311 paUsers[iPrev].iNext = paUsers[i].iNext;
2312 else
2313 pPage->iUserHead = paUsers[i].iNext;
2314
2315 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2316 paUsers[i].iNext = pPool->iUserFreeHead;
2317 pPool->iUserFreeHead = i;
2318 return;
2319 }
2320 iPrev = i;
2321 i = paUsers[i].iNext;
2322 }
2323
2324 /* Fatal: didn't find it */
2325 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2326 iUser, iUserTable, pPage->GCPhys));
2327}
2328
2329
2330/**
2331 * Gets the entry size of a shadow table.
2332 *
2333 * @param enmKind The kind of page.
2334 *
2335 * @returns The size of the entry in bytes. That is, 4 or 8.
2336 * @returns If the kind is not for a table, an assertion is raised and 0 is
2337 * returned.
2338 */
2339DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2340{
2341 switch (enmKind)
2342 {
2343 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2344 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2345 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2346 case PGMPOOLKIND_ROOT_32BIT_PD:
2347 return 4;
2348
2349 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2350 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2351 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2352 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2353 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2354 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2355 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2356 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2357 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2358 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2359 case PGMPOOLKIND_ROOT_PAE_PD:
2360 case PGMPOOLKIND_ROOT_PDPT:
2361 case PGMPOOLKIND_ROOT_NESTED:
2362 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2363 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2364 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2365 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2366 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2367 return 8;
2368
2369 default:
2370 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2371 }
2372}
2373
2374
2375/**
2376 * Gets the entry size of a guest table.
2377 *
2378 * @param enmKind The kind of page.
2379 *
2380 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2381 * @returns If the kind is not for a table, an assertion is raised and 0 is
2382 * returned.
2383 */
2384DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2385{
2386 switch (enmKind)
2387 {
2388 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2389 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2390 case PGMPOOLKIND_ROOT_32BIT_PD:
2391 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2392 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2393 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2394 return 4;
2395
2396 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2397 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2398 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2399 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2400 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2401 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2402 case PGMPOOLKIND_ROOT_PAE_PD:
2403 case PGMPOOLKIND_ROOT_PDPT:
2404 return 8;
2405
2406 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2407 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2408 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2409 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2410 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2411 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2412 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2413 case PGMPOOLKIND_ROOT_NESTED:
2414 /** @todo can we return 0? (nobody is calling this...) */
2415 AssertFailed();
2416 return 0;
2417
2418 default:
2419 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2420 }
2421}
2422
2423#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2424
2425/**
2426 * Scans one shadow page table for mappings of a physical page.
2427 *
2428 * @param pVM The VM handle.
2429 * @param pPhysPage The guest page in question.
2430 * @param iShw The shadow page table.
2431 * @param cRefs The number of references made in that PT.
2432 */
2433static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2434{
2435 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2436 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2437
2438 /*
2439 * Assert sanity.
2440 */
2441 Assert(cRefs == 1);
2442 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2443 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2444
2445 /*
2446 * Then, clear the actual mappings to the page in the shadow PT.
2447 */
2448 switch (pPage->enmKind)
2449 {
2450 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2451 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2452 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2453 {
2454 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2455 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2456 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2457 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2458 {
2459 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2460 pPT->a[i].u = 0;
2461 cRefs--;
2462 if (!cRefs)
2463 return;
2464 }
2465#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2466 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2467 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2468 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2469 {
2470 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2471 pPT->a[i].u = 0;
2472 }
2473#endif
2474 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2475 break;
2476 }
2477
2478 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2479 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2480 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2481 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2482 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2483 {
2484 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2485 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2486 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2487 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2488 {
2489 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2490 pPT->a[i].u = 0;
2491 cRefs--;
2492 if (!cRefs)
2493 return;
2494 }
2495#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2496 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2497 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2498 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2499 {
2500 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2501 pPT->a[i].u = 0;
2502 }
2503#endif
2504 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2505 break;
2506 }
2507
2508 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2509 {
2510 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2511 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2512 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2513 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2514 {
2515 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2516 pPT->a[i].u = 0;
2517 cRefs--;
2518 if (!cRefs)
2519 return;
2520 }
2521#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2522 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2523 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2524 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2525 {
2526 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2527 pPT->a[i].u = 0;
2528 }
2529#endif
2530 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2531 break;
2532 }
2533
2534 default:
2535 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2536 }
2537}
2538
2539
2540/**
2541 * Scans one shadow page table for mappings of a physical page.
2542 *
2543 * @param pVM The VM handle.
2544 * @param pPhysPage The guest page in question.
2545 * @param iShw The shadow page table.
2546 * @param cRefs The number of references made in that PT.
2547 */
2548void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2549{
2550 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2551 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2552 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2553 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2554 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2555 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2556}
2557
2558
2559/**
2560 * Flushes a list of shadow page tables mapping the same physical page.
2561 *
2562 * @param pVM The VM handle.
2563 * @param pPhysPage The guest page in question.
2564 * @param iPhysExt The physical cross reference extent list to flush.
2565 */
2566void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2567{
2568 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2569 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2570 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2571
2572 const uint16_t iPhysExtStart = iPhysExt;
2573 PPGMPOOLPHYSEXT pPhysExt;
2574 do
2575 {
2576 Assert(iPhysExt < pPool->cMaxPhysExts);
2577 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2578 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2579 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2580 {
2581 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2582 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2583 }
2584
2585 /* next */
2586 iPhysExt = pPhysExt->iNext;
2587 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2588
2589 /* insert the list into the free list and clear the ram range entry. */
2590 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2591 pPool->iPhysExtFreeHead = iPhysExtStart;
2592 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2593
2594 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2595}
2596
2597#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2598
2599/**
2600 * Scans all shadow page tables for mappings of a physical page.
2601 *
2602 * This may be slow, but it's most likely more efficient than cleaning
2603 * out the entire page pool / cache.
2604 *
2605 * @returns VBox status code.
2606 * @retval VINF_SUCCESS if all references has been successfully cleared.
2607 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2608 * a page pool cleaning.
2609 *
2610 * @param pVM The VM handle.
2611 * @param pPhysPage The guest page in question.
2612 */
2613int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2614{
2615 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2616 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2617 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2618 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2619
2620#if 1
2621 /*
2622 * There is a limit to what makes sense.
2623 */
2624 if (pPool->cPresent > 1024)
2625 {
2626 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2627 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2628 return VINF_PGM_GCPHYS_ALIASED;
2629 }
2630#endif
2631
2632 /*
2633 * Iterate all the pages until we've encountered all that in use.
2634 * This is simple but not quite optimal solution.
2635 */
2636 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2637 const uint32_t u32 = u64;
2638 unsigned cLeft = pPool->cUsedPages;
2639 unsigned iPage = pPool->cCurPages;
2640 while (--iPage >= PGMPOOL_IDX_FIRST)
2641 {
2642 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2643 if (pPage->GCPhys != NIL_RTGCPHYS)
2644 {
2645 switch (pPage->enmKind)
2646 {
2647 /*
2648 * We only care about shadow page tables.
2649 */
2650 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2651 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2652 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2653 {
2654 unsigned cPresent = pPage->cPresent;
2655 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2656 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2657 if (pPT->a[i].n.u1Present)
2658 {
2659 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2660 {
2661 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2662 pPT->a[i].u = 0;
2663 }
2664 if (!--cPresent)
2665 break;
2666 }
2667 break;
2668 }
2669
2670 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2671 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2672 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2673 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2674 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2675 {
2676 unsigned cPresent = pPage->cPresent;
2677 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2678 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2679 if (pPT->a[i].n.u1Present)
2680 {
2681 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2682 {
2683 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2684 pPT->a[i].u = 0;
2685 }
2686 if (!--cPresent)
2687 break;
2688 }
2689 break;
2690 }
2691 }
2692 if (!--cLeft)
2693 break;
2694 }
2695 }
2696
2697 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2698 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2699 return VINF_SUCCESS;
2700}
2701
2702
2703/**
2704 * Clears the user entry in a user table.
2705 *
2706 * This is used to remove all references to a page when flushing it.
2707 */
2708static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2709{
2710 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2711 Assert(pUser->iUser < pPool->cCurPages);
2712
2713 /*
2714 * Map the user page.
2715 */
2716 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2717 union
2718 {
2719 uint64_t *pau64;
2720 uint32_t *pau32;
2721 } u;
2722 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
2723
2724 /* Safety precaution in case we change the paging for other modes too in the future. */
2725 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
2726
2727#ifdef VBOX_STRICT
2728 /*
2729 * Some sanity checks.
2730 */
2731 switch (pUserPage->enmKind)
2732 {
2733 case PGMPOOLKIND_ROOT_32BIT_PD:
2734 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2735 Assert(!(u.pau32[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2736 break;
2737 case PGMPOOLKIND_ROOT_PAE_PD:
2738 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2739 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2740 break;
2741 case PGMPOOLKIND_ROOT_PDPT:
2742 Assert(pUser->iUserTable < 4);
2743 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2744 break;
2745 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2746 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2747 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2748 break;
2749 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2750 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2751 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2752 break;
2753 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2754 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2755 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2756 break;
2757 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2758 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2759 /* GCPhys >> PAGE_SHIFT is the index here */
2760 break;
2761 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2762 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2763 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2764 break;
2765
2766 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2767 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2768 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2769 break;
2770
2771 case PGMPOOLKIND_ROOT_NESTED:
2772 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2773 break;
2774
2775 default:
2776 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2777 break;
2778 }
2779#endif /* VBOX_STRICT */
2780
2781 /*
2782 * Clear the entry in the user page.
2783 */
2784 switch (pUserPage->enmKind)
2785 {
2786 /* 32-bit entries */
2787 case PGMPOOLKIND_ROOT_32BIT_PD:
2788 u.pau32[pUser->iUserTable] = 0;
2789 break;
2790
2791 /* 64-bit entries */
2792 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2793 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2794 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2795 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2796 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2797 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2798 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2799 case PGMPOOLKIND_ROOT_PAE_PD:
2800 case PGMPOOLKIND_ROOT_PDPT:
2801 case PGMPOOLKIND_ROOT_NESTED:
2802 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2803 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2804 u.pau64[pUser->iUserTable] = 0;
2805 break;
2806
2807 default:
2808 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2809 }
2810}
2811
2812
2813/**
2814 * Clears all users of a page.
2815 */
2816static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2817{
2818 /*
2819 * Free all the user records.
2820 */
2821 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2822 uint16_t i = pPage->iUserHead;
2823 while (i != NIL_PGMPOOL_USER_INDEX)
2824 {
2825 /* Clear enter in user table. */
2826 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2827
2828 /* Free it. */
2829 const uint16_t iNext = paUsers[i].iNext;
2830 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2831 paUsers[i].iNext = pPool->iUserFreeHead;
2832 pPool->iUserFreeHead = i;
2833
2834 /* Next. */
2835 i = iNext;
2836 }
2837 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2838}
2839
2840#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2841
2842/**
2843 * Allocates a new physical cross reference extent.
2844 *
2845 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2846 * @param pVM The VM handle.
2847 * @param piPhysExt Where to store the phys ext index.
2848 */
2849PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2850{
2851 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2852 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2853 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2854 {
2855 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2856 return NULL;
2857 }
2858 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2859 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2860 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2861 *piPhysExt = iPhysExt;
2862 return pPhysExt;
2863}
2864
2865
2866/**
2867 * Frees a physical cross reference extent.
2868 *
2869 * @param pVM The VM handle.
2870 * @param iPhysExt The extent to free.
2871 */
2872void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2873{
2874 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2875 Assert(iPhysExt < pPool->cMaxPhysExts);
2876 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2877 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2878 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2879 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2880 pPool->iPhysExtFreeHead = iPhysExt;
2881}
2882
2883
2884/**
2885 * Frees a physical cross reference extent.
2886 *
2887 * @param pVM The VM handle.
2888 * @param iPhysExt The extent to free.
2889 */
2890void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2891{
2892 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2893
2894 const uint16_t iPhysExtStart = iPhysExt;
2895 PPGMPOOLPHYSEXT pPhysExt;
2896 do
2897 {
2898 Assert(iPhysExt < pPool->cMaxPhysExts);
2899 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2900 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2901 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2902
2903 /* next */
2904 iPhysExt = pPhysExt->iNext;
2905 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2906
2907 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2908 pPool->iPhysExtFreeHead = iPhysExtStart;
2909}
2910
2911
2912/**
2913 * Insert a reference into a list of physical cross reference extents.
2914 *
2915 * @returns The new ram range flags (top 16-bits).
2916 *
2917 * @param pVM The VM handle.
2918 * @param iPhysExt The physical extent index of the list head.
2919 * @param iShwPT The shadow page table index.
2920 *
2921 */
2922static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2923{
2924 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2925 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2926
2927 /* special common case. */
2928 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2929 {
2930 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2931 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2932 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2933 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2934 }
2935
2936 /* general treatment. */
2937 const uint16_t iPhysExtStart = iPhysExt;
2938 unsigned cMax = 15;
2939 for (;;)
2940 {
2941 Assert(iPhysExt < pPool->cMaxPhysExts);
2942 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2943 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2944 {
2945 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2946 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2947 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2948 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2949 }
2950 if (!--cMax)
2951 {
2952 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2953 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2954 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2955 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2956 }
2957 }
2958
2959 /* add another extent to the list. */
2960 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2961 if (!pNew)
2962 {
2963 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2964 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2965 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2966 }
2967 pNew->iNext = iPhysExtStart;
2968 pNew->aidx[0] = iShwPT;
2969 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2970 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2971}
2972
2973
2974/**
2975 * Add a reference to guest physical page where extents are in use.
2976 *
2977 * @returns The new ram range flags (top 16-bits).
2978 *
2979 * @param pVM The VM handle.
2980 * @param u16 The ram range flags (top 16-bits).
2981 * @param iShwPT The shadow page table index.
2982 */
2983uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2984{
2985 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2986 {
2987 /*
2988 * Convert to extent list.
2989 */
2990 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2991 uint16_t iPhysExt;
2992 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2993 if (pPhysExt)
2994 {
2995 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2996 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2997 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2998 pPhysExt->aidx[1] = iShwPT;
2999 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3000 }
3001 else
3002 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3003 }
3004 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
3005 {
3006 /*
3007 * Insert into the extent list.
3008 */
3009 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
3010 }
3011 else
3012 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3013 return u16;
3014}
3015
3016
3017/**
3018 * Clear references to guest physical memory.
3019 *
3020 * @param pPool The pool.
3021 * @param pPage The page.
3022 * @param pPhysPage Pointer to the aPages entry in the ram range.
3023 */
3024void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3025{
3026 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
3027 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3028
3029 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
3030 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
3031 {
3032 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3033 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3034 do
3035 {
3036 Assert(iPhysExt < pPool->cMaxPhysExts);
3037
3038 /*
3039 * Look for the shadow page and check if it's all freed.
3040 */
3041 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3042 {
3043 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3044 {
3045 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3046
3047 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3048 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3049 {
3050 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3051 return;
3052 }
3053
3054 /* we can free the node. */
3055 PVM pVM = pPool->CTX_SUFF(pVM);
3056 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3057 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3058 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3059 {
3060 /* lonely node */
3061 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3062 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
3063 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3064 }
3065 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3066 {
3067 /* head */
3068 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
3069 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
3070 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
3071 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
3072 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3073 }
3074 else
3075 {
3076 /* in list */
3077 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3078 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3079 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3080 }
3081 iPhysExt = iPhysExtNext;
3082 return;
3083 }
3084 }
3085
3086 /* next */
3087 iPhysExtPrev = iPhysExt;
3088 iPhysExt = paPhysExts[iPhysExt].iNext;
3089 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3090
3091 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3092 }
3093 else /* nothing to do */
3094 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
3095}
3096
3097
3098/**
3099 * Clear references to guest physical memory.
3100 *
3101 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3102 * is assumed to be correct, so the linear search can be skipped and we can assert
3103 * at an earlier point.
3104 *
3105 * @param pPool The pool.
3106 * @param pPage The page.
3107 * @param HCPhys The host physical address corresponding to the guest page.
3108 * @param GCPhys The guest physical address corresponding to HCPhys.
3109 */
3110static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3111{
3112 /*
3113 * Walk range list.
3114 */
3115 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3116 while (pRam)
3117 {
3118 RTGCPHYS off = GCPhys - pRam->GCPhys;
3119 if (off < pRam->cb)
3120 {
3121 /* does it match? */
3122 const unsigned iPage = off >> PAGE_SHIFT;
3123 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3124#ifdef LOG_ENABLED
3125RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3126Log(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3127#endif
3128 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3129 {
3130 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3131 return;
3132 }
3133 break;
3134 }
3135 pRam = pRam->CTX_SUFF(pNext);
3136 }
3137 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3138}
3139
3140
3141/**
3142 * Clear references to guest physical memory.
3143 *
3144 * @param pPool The pool.
3145 * @param pPage The page.
3146 * @param HCPhys The host physical address corresponding to the guest page.
3147 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3148 */
3149static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3150{
3151 /*
3152 * Walk range list.
3153 */
3154 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3155 while (pRam)
3156 {
3157 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3158 if (off < pRam->cb)
3159 {
3160 /* does it match? */
3161 const unsigned iPage = off >> PAGE_SHIFT;
3162 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3163 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3164 {
3165 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3166 return;
3167 }
3168 break;
3169 }
3170 pRam = pRam->CTX_SUFF(pNext);
3171 }
3172
3173 /*
3174 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3175 */
3176 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3177 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3178 while (pRam)
3179 {
3180 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3181 while (iPage-- > 0)
3182 {
3183 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3184 {
3185 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3186 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3187 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3188 return;
3189 }
3190 }
3191 pRam = pRam->CTX_SUFF(pNext);
3192 }
3193
3194 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3195}
3196
3197
3198/**
3199 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3200 *
3201 * @param pPool The pool.
3202 * @param pPage The page.
3203 * @param pShwPT The shadow page table (mapping of the page).
3204 * @param pGstPT The guest page table.
3205 */
3206DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3207{
3208 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3209 if (pShwPT->a[i].n.u1Present)
3210 {
3211 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3212 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3213 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3214 if (!--pPage->cPresent)
3215 break;
3216 }
3217}
3218
3219
3220/**
3221 * Clear references to guest physical memory in a PAE / 32-bit page table.
3222 *
3223 * @param pPool The pool.
3224 * @param pPage The page.
3225 * @param pShwPT The shadow page table (mapping of the page).
3226 * @param pGstPT The guest page table (just a half one).
3227 */
3228DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3229{
3230 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3231 if (pShwPT->a[i].n.u1Present)
3232 {
3233 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3234 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3235 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3236 }
3237}
3238
3239
3240/**
3241 * Clear references to guest physical memory in a PAE / PAE page table.
3242 *
3243 * @param pPool The pool.
3244 * @param pPage The page.
3245 * @param pShwPT The shadow page table (mapping of the page).
3246 * @param pGstPT The guest page table.
3247 */
3248DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3249{
3250 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3251 if (pShwPT->a[i].n.u1Present)
3252 {
3253 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3254 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3255 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3256 }
3257}
3258
3259
3260/**
3261 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3262 *
3263 * @param pPool The pool.
3264 * @param pPage The page.
3265 * @param pShwPT The shadow page table (mapping of the page).
3266 */
3267DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3268{
3269 RTGCPHYS GCPhys = pPage->GCPhys;
3270 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3271 if (pShwPT->a[i].n.u1Present)
3272 {
3273 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3274 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3275 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3276 }
3277}
3278
3279
3280/**
3281 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3282 *
3283 * @param pPool The pool.
3284 * @param pPage The page.
3285 * @param pShwPT The shadow page table (mapping of the page).
3286 */
3287DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3288{
3289 RTGCPHYS GCPhys = pPage->GCPhys;
3290 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3291 if (pShwPT->a[i].n.u1Present)
3292 {
3293 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3294 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3295 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3296 }
3297}
3298
3299#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3300
3301/**
3302 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3303 *
3304 * @param pPool The pool.
3305 * @param pPage The page.
3306 * @param pShwPD The shadow page directory (mapping of the page).
3307 */
3308DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3309{
3310 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3311 {
3312 if (pShwPD->a[i].n.u1Present)
3313 {
3314 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3315 if (pSubPage)
3316 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3317 else
3318 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3319 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3320 }
3321 }
3322}
3323
3324
3325/**
3326 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3327 *
3328 * @param pPool The pool.
3329 * @param pPage The page.
3330 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3331 */
3332DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3333{
3334 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3335 {
3336 if (pShwPDPT->a[i].n.u1Present)
3337 {
3338 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3339 if (pSubPage)
3340 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3341 else
3342 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3343 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3344 }
3345 }
3346}
3347
3348
3349/**
3350 * Clear references to shadowed pages in a 64-bit level 4 page table.
3351 *
3352 * @param pPool The pool.
3353 * @param pPage The page.
3354 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3355 */
3356DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3357{
3358 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3359 {
3360 if (pShwPML4->a[i].n.u1Present)
3361 {
3362 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3363 if (pSubPage)
3364 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3365 else
3366 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3367 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3368 }
3369 }
3370}
3371
3372
3373/**
3374 * Clear references to shadowed pages in an EPT page table.
3375 *
3376 * @param pPool The pool.
3377 * @param pPage The page.
3378 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3379 */
3380DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3381{
3382 RTGCPHYS GCPhys = pPage->GCPhys;
3383 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3384 if (pShwPT->a[i].n.u1Present)
3385 {
3386 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3387 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3388 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3389 }
3390}
3391
3392
3393/**
3394 * Clear references to shadowed pages in an EPT page directory.
3395 *
3396 * @param pPool The pool.
3397 * @param pPage The page.
3398 * @param pShwPD The shadow page directory (mapping of the page).
3399 */
3400DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3401{
3402 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3403 {
3404 if (pShwPD->a[i].n.u1Present)
3405 {
3406 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3407 if (pSubPage)
3408 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3409 else
3410 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3411 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3412 }
3413 }
3414}
3415
3416
3417/**
3418 * Clear references to shadowed pages in an EPT page directory pointer table.
3419 *
3420 * @param pPool The pool.
3421 * @param pPage The page.
3422 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3423 */
3424DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3425{
3426 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3427 {
3428 if (pShwPDPT->a[i].n.u1Present)
3429 {
3430 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3431 if (pSubPage)
3432 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3433 else
3434 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3435 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3436 }
3437 }
3438}
3439
3440
3441/**
3442 * Clears all references made by this page.
3443 *
3444 * This includes other shadow pages and GC physical addresses.
3445 *
3446 * @param pPool The pool.
3447 * @param pPage The page.
3448 */
3449static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3450{
3451 /*
3452 * Map the shadow page and take action according to the page kind.
3453 */
3454 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3455 switch (pPage->enmKind)
3456 {
3457#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3458 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3459 {
3460 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3461 void *pvGst;
3462 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3463 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3464 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3465 break;
3466 }
3467
3468 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3469 {
3470 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3471 void *pvGst;
3472 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3473 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3474 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3475 break;
3476 }
3477
3478 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3479 {
3480 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3481 void *pvGst;
3482 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3483 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3484 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3485 break;
3486 }
3487
3488 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3489 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3490 {
3491 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3492 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3493 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3494 break;
3495 }
3496
3497 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3498 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3499 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3500 {
3501 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3502 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3503 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3504 break;
3505 }
3506
3507#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3508 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3509 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3510 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3511 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3512 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3513 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3514 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3515 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3516 break;
3517#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3518
3519 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
3520 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3521 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3522 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3523 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3524 break;
3525
3526 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3527 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3528 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3529 break;
3530
3531 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
3532 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3533 break;
3534
3535 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3536 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
3537 break;
3538
3539 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3540 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
3541 break;
3542
3543 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3544 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
3545 break;
3546
3547 default:
3548 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3549 }
3550
3551 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3552 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3553 ASMMemZeroPage(pvShw);
3554 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3555 pPage->fZeroed = true;
3556}
3557
3558#endif /* PGMPOOL_WITH_USER_TRACKING */
3559
3560/**
3561 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3562 *
3563 * @param pPool The pool.
3564 */
3565static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3566{
3567 /*
3568 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3569 */
3570 Assert(NIL_PGMPOOL_IDX == 0);
3571 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3572 {
3573 /*
3574 * Get the page address.
3575 */
3576 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3577 union
3578 {
3579 uint64_t *pau64;
3580 uint32_t *pau32;
3581 } u;
3582 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3583
3584 /*
3585 * Mark stuff not present.
3586 */
3587 switch (pPage->enmKind)
3588 {
3589 case PGMPOOLKIND_ROOT_32BIT_PD:
3590 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3591 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3592 u.pau32[iPage] = 0;
3593 break;
3594
3595 case PGMPOOLKIND_ROOT_PAE_PD:
3596 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * X86_PG_PAE_PDPE_ENTRIES; iPage++)
3597 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3598 u.pau64[iPage] = 0;
3599 break;
3600
3601 case PGMPOOLKIND_ROOT_PDPT:
3602 /* Not root of shadowed pages currently, ignore it. */
3603 break;
3604
3605 case PGMPOOLKIND_ROOT_NESTED:
3606 ASMMemZero32(u.pau64, PAGE_SIZE);
3607 break;
3608 }
3609 }
3610
3611 /*
3612 * Paranoia (to be removed), flag a global CR3 sync.
3613 */
3614 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
3615}
3616
3617
3618/**
3619 * Flushes the entire cache.
3620 *
3621 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3622 * and execute this CR3 flush.
3623 *
3624 * @param pPool The pool.
3625 */
3626static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3627{
3628 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3629 LogFlow(("pgmPoolFlushAllInt:\n"));
3630
3631 /*
3632 * If there are no pages in the pool, there is nothing to do.
3633 */
3634 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3635 {
3636 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3637 return;
3638 }
3639
3640 /*
3641 * Nuke the free list and reinsert all pages into it.
3642 */
3643 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3644 {
3645 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3646
3647#ifdef IN_RING3
3648 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMR3, pPage->pvPageR3));
3649#endif
3650#ifdef PGMPOOL_WITH_MONITORING
3651 if (pPage->fMonitored)
3652 pgmPoolMonitorFlush(pPool, pPage);
3653 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3654 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3655 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3656 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3657 pPage->cModifications = 0;
3658#endif
3659 pPage->GCPhys = NIL_RTGCPHYS;
3660 pPage->enmKind = PGMPOOLKIND_FREE;
3661 Assert(pPage->idx == i);
3662 pPage->iNext = i + 1;
3663 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3664 pPage->fSeenNonGlobal = false;
3665 pPage->fMonitored= false;
3666 pPage->fCached = false;
3667 pPage->fReusedFlushPending = false;
3668 pPage->fCR3Mix = false;
3669#ifdef PGMPOOL_WITH_USER_TRACKING
3670 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3671#endif
3672#ifdef PGMPOOL_WITH_CACHE
3673 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3674 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3675#endif
3676 }
3677 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3678 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3679 pPool->cUsedPages = 0;
3680
3681#ifdef PGMPOOL_WITH_USER_TRACKING
3682 /*
3683 * Zap and reinitialize the user records.
3684 */
3685 pPool->cPresent = 0;
3686 pPool->iUserFreeHead = 0;
3687 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3688 const unsigned cMaxUsers = pPool->cMaxUsers;
3689 for (unsigned i = 0; i < cMaxUsers; i++)
3690 {
3691 paUsers[i].iNext = i + 1;
3692 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3693 paUsers[i].iUserTable = 0xfffffffe;
3694 }
3695 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3696#endif
3697
3698#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3699 /*
3700 * Clear all the GCPhys links and rebuild the phys ext free list.
3701 */
3702 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3703 pRam;
3704 pRam = pRam->CTX_SUFF(pNext))
3705 {
3706 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3707 while (iPage-- > 0)
3708 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3709 }
3710
3711 pPool->iPhysExtFreeHead = 0;
3712 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3713 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3714 for (unsigned i = 0; i < cMaxPhysExts; i++)
3715 {
3716 paPhysExts[i].iNext = i + 1;
3717 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3718 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3719 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3720 }
3721 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3722#endif
3723
3724#ifdef PGMPOOL_WITH_MONITORING
3725 /*
3726 * Just zap the modified list.
3727 */
3728 pPool->cModifiedPages = 0;
3729 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3730#endif
3731
3732#ifdef PGMPOOL_WITH_CACHE
3733 /*
3734 * Clear the GCPhys hash and the age list.
3735 */
3736 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
3737 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3738 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3739 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3740#endif
3741
3742 /*
3743 * Flush all the special root pages.
3744 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3745 */
3746 pgmPoolFlushAllSpecialRoots(pPool);
3747 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3748 {
3749 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3750 pPage->iNext = NIL_PGMPOOL_IDX;
3751#ifdef PGMPOOL_WITH_MONITORING
3752 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3753 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3754 pPage->cModifications = 0;
3755 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3756 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3757 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3758 if (pPage->fMonitored)
3759 {
3760 PVM pVM = pPool->CTX_SUFF(pVM);
3761 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3762 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3763 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3764 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
3765 pPool->pszAccessHandler);
3766 AssertFatalRCSuccess(rc);
3767# ifdef PGMPOOL_WITH_CACHE
3768 pgmPoolHashInsert(pPool, pPage);
3769# endif
3770 }
3771#endif
3772#ifdef PGMPOOL_WITH_USER_TRACKING
3773 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3774#endif
3775#ifdef PGMPOOL_WITH_CACHE
3776 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3777 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3778#endif
3779 }
3780
3781 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3782}
3783
3784
3785/**
3786 * Flushes a pool page.
3787 *
3788 * This moves the page to the free list after removing all user references to it.
3789 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3790 *
3791 * @returns VBox status code.
3792 * @retval VINF_SUCCESS on success.
3793 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3794 * @param pPool The pool.
3795 * @param HCPhys The HC physical address of the shadow page.
3796 */
3797int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3798{
3799 int rc = VINF_SUCCESS;
3800 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3801 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%d, .GCPhys=%RGp}\n",
3802 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3803
3804 /*
3805 * Quietly reject any attempts at flushing any of the special root pages.
3806 */
3807 if (pPage->idx < PGMPOOL_IDX_FIRST)
3808 {
3809 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3810 return VINF_SUCCESS;
3811 }
3812
3813 /*
3814 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
3815 */
3816 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
3817 {
3818 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4,
3819 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
3820 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3821 return VINF_SUCCESS;
3822 }
3823
3824 /*
3825 * Mark the page as being in need of a ASMMemZeroPage().
3826 */
3827 pPage->fZeroed = false;
3828
3829#ifdef PGMPOOL_WITH_USER_TRACKING
3830 /*
3831 * Clear the page.
3832 */
3833 pgmPoolTrackClearPageUsers(pPool, pPage);
3834 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3835 pgmPoolTrackDeref(pPool, pPage);
3836 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3837#endif
3838
3839#ifdef PGMPOOL_WITH_CACHE
3840 /*
3841 * Flush it from the cache.
3842 */
3843 pgmPoolCacheFlushPage(pPool, pPage);
3844#endif /* PGMPOOL_WITH_CACHE */
3845
3846#ifdef PGMPOOL_WITH_MONITORING
3847 /*
3848 * Deregistering the monitoring.
3849 */
3850 if (pPage->fMonitored)
3851 rc = pgmPoolMonitorFlush(pPool, pPage);
3852#endif
3853
3854 /*
3855 * Free the page.
3856 */
3857 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3858 pPage->iNext = pPool->iFreeHead;
3859 pPool->iFreeHead = pPage->idx;
3860 pPage->enmKind = PGMPOOLKIND_FREE;
3861 pPage->GCPhys = NIL_RTGCPHYS;
3862 pPage->fReusedFlushPending = false;
3863
3864 pPool->cUsedPages--;
3865 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3866 return rc;
3867}
3868
3869
3870/**
3871 * Frees a usage of a pool page.
3872 *
3873 * The caller is responsible to updating the user table so that it no longer
3874 * references the shadow page.
3875 *
3876 * @param pPool The pool.
3877 * @param HCPhys The HC physical address of the shadow page.
3878 * @param iUser The shadow page pool index of the user table.
3879 * @param iUserTable The index into the user table (shadowed).
3880 */
3881void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3882{
3883 STAM_PROFILE_START(&pPool->StatFree, a);
3884 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3885 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3886 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3887#ifdef PGMPOOL_WITH_USER_TRACKING
3888 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3889#endif
3890#ifdef PGMPOOL_WITH_CACHE
3891 if (!pPage->fCached)
3892#endif
3893 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3894 STAM_PROFILE_STOP(&pPool->StatFree, a);
3895}
3896
3897
3898/**
3899 * Makes one or more free page free.
3900 *
3901 * @returns VBox status code.
3902 * @retval VINF_SUCCESS on success.
3903 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3904 *
3905 * @param pPool The pool.
3906 * @param iUser The user of the page.
3907 */
3908static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3909{
3910 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3911
3912 /*
3913 * If the pool isn't full grown yet, expand it.
3914 */
3915 if (pPool->cCurPages < pPool->cMaxPages)
3916 {
3917 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3918#ifdef IN_RING3
3919 int rc = PGMR3PoolGrow(pPool->pVMR3);
3920#else
3921 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3922#endif
3923 if (RT_FAILURE(rc))
3924 return rc;
3925 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3926 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3927 return VINF_SUCCESS;
3928 }
3929
3930#ifdef PGMPOOL_WITH_CACHE
3931 /*
3932 * Free one cached page.
3933 */
3934 return pgmPoolCacheFreeOne(pPool, iUser);
3935#else
3936 /*
3937 * Flush the pool.
3938 * If we have tracking enabled, it should be possible to come up with
3939 * a cheap replacement strategy...
3940 */
3941 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
3942 Assert(!CPUMIsGuestInLongMode(pVM));
3943 pgmPoolFlushAllInt(pPool);
3944 return VERR_PGM_POOL_FLUSHED;
3945#endif
3946}
3947
3948
3949/**
3950 * Allocates a page from the pool.
3951 *
3952 * This page may actually be a cached page and not in need of any processing
3953 * on the callers part.
3954 *
3955 * @returns VBox status code.
3956 * @retval VINF_SUCCESS if a NEW page was allocated.
3957 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3958 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3959 * @param pVM The VM handle.
3960 * @param GCPhys The GC physical address of the page we're gonna shadow.
3961 * For 4MB and 2MB PD entries, it's the first address the
3962 * shadow PT is covering.
3963 * @param enmKind The kind of mapping.
3964 * @param iUser The shadow page pool index of the user table.
3965 * @param iUserTable The index into the user table (shadowed).
3966 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3967 */
3968int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
3969{
3970 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3971 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3972 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3973 *ppPage = NULL;
3974
3975#ifdef PGMPOOL_WITH_CACHE
3976 if (pPool->fCacheEnabled)
3977 {
3978 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3979 if (RT_SUCCESS(rc2))
3980 {
3981 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3982 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3983 return rc2;
3984 }
3985 }
3986#endif
3987
3988 /*
3989 * Allocate a new one.
3990 */
3991 int rc = VINF_SUCCESS;
3992 uint16_t iNew = pPool->iFreeHead;
3993 if (iNew == NIL_PGMPOOL_IDX)
3994 {
3995 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3996 if (RT_FAILURE(rc))
3997 {
3998 if (rc != VERR_PGM_POOL_CLEARED)
3999 {
4000 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4001 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4002 return rc;
4003 }
4004 Log(("pgmPoolMakeMoreFreePages failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
4005 rc = VERR_PGM_POOL_FLUSHED;
4006 }
4007 iNew = pPool->iFreeHead;
4008 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4009 }
4010
4011 /* unlink the free head */
4012 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4013 pPool->iFreeHead = pPage->iNext;
4014 pPage->iNext = NIL_PGMPOOL_IDX;
4015
4016 /*
4017 * Initialize it.
4018 */
4019 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4020 pPage->enmKind = enmKind;
4021 pPage->GCPhys = GCPhys;
4022 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4023 pPage->fMonitored = false;
4024 pPage->fCached = false;
4025 pPage->fReusedFlushPending = false;
4026 pPage->fCR3Mix = false;
4027#ifdef PGMPOOL_WITH_MONITORING
4028 pPage->cModifications = 0;
4029 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4030 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4031#endif
4032#ifdef PGMPOOL_WITH_USER_TRACKING
4033 pPage->cPresent = 0;
4034 pPage->iFirstPresent = ~0;
4035
4036 /*
4037 * Insert into the tracking and cache. If this fails, free the page.
4038 */
4039 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4040 if (RT_FAILURE(rc3))
4041 {
4042 if (rc3 != VERR_PGM_POOL_CLEARED)
4043 {
4044 pPool->cUsedPages--;
4045 pPage->enmKind = PGMPOOLKIND_FREE;
4046 pPage->GCPhys = NIL_RTGCPHYS;
4047 pPage->iNext = pPool->iFreeHead;
4048 pPool->iFreeHead = pPage->idx;
4049 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4050 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4051 return rc3;
4052 }
4053 Log(("pgmPoolTrackInsert failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4054 rc = VERR_PGM_POOL_FLUSHED;
4055 }
4056#endif /* PGMPOOL_WITH_USER_TRACKING */
4057
4058 /*
4059 * Commit the allocation, clear the page and return.
4060 */
4061#ifdef VBOX_WITH_STATISTICS
4062 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4063 pPool->cUsedPagesHigh = pPool->cUsedPages;
4064#endif
4065
4066 if (!pPage->fZeroed)
4067 {
4068 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4069 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4070 ASMMemZeroPage(pv);
4071 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4072 }
4073
4074 *ppPage = pPage;
4075 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4076 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4077 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4078 return rc;
4079}
4080
4081
4082/**
4083 * Frees a usage of a pool page.
4084 *
4085 * @param pVM The VM handle.
4086 * @param HCPhys The HC physical address of the shadow page.
4087 * @param iUser The shadow page pool index of the user table.
4088 * @param iUserTable The index into the user table (shadowed).
4089 */
4090void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4091{
4092 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4093 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4094 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4095}
4096
4097
4098/**
4099 * Gets a in-use page in the pool by it's physical address.
4100 *
4101 * @returns Pointer to the page.
4102 * @param pVM The VM handle.
4103 * @param HCPhys The HC physical address of the shadow page.
4104 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4105 */
4106PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4107{
4108 /** @todo profile this! */
4109 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4110 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4111 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%RHp -> %p:{.idx=%d .GCPhys=%RGp .enmKind=%d}\n",
4112 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
4113 return pPage;
4114}
4115
4116
4117/**
4118 * Flushes the entire cache.
4119 *
4120 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4121 * and execute this CR3 flush.
4122 *
4123 * @param pPool The pool.
4124 */
4125void pgmPoolFlushAll(PVM pVM)
4126{
4127 LogFlow(("pgmPoolFlushAll:\n"));
4128 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4129}
4130
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette