VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 10171

Last change on this file since 10171 was 10063, checked in by vboxsync, 17 years ago

AMD64 updates for nested paging.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 134.2 KB
Line 
1/* $Id: PGMAllPool.cpp 10063 2008-07-01 09:57:49Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_GC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37
38#include <VBox/log.h>
39#include <VBox/err.h>
40#include <iprt/asm.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46__BEGIN_DECLS
47static void pgmPoolFlushAllInt(PPGMPOOL pPool);
48#ifdef PGMPOOL_WITH_USER_TRACKING
49DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
50DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
51static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#endif
53#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
54static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
55#endif
56#ifdef PGMPOOL_WITH_CACHE
57static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
58#endif
59#ifdef PGMPOOL_WITH_MONITORING
60static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
61#endif
62#ifndef IN_RING3
63DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
64#endif
65__END_DECLS
66
67
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86
87
88#ifdef IN_GC
89/**
90 * Maps a pool page into the current context.
91 *
92 * @returns Pointer to the mapping.
93 * @param pVM The VM handle.
94 * @param pPage The page to map.
95 */
96void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
97{
98 /* general pages. */
99 if (pPage->idx >= PGMPOOL_IDX_FIRST)
100 {
101 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
102 void *pv;
103 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
104 AssertReleaseRC(rc);
105 return pv;
106 }
107
108 /* special pages. */
109 switch (pPage->idx)
110 {
111 case PGMPOOL_IDX_PD:
112 return pVM->pgm.s.pGC32BitPD;
113 case PGMPOOL_IDX_PAE_PD:
114 case PGMPOOL_IDX_PAE_PD_0:
115 return pVM->pgm.s.apGCPaePDs[0];
116 case PGMPOOL_IDX_PAE_PD_1:
117 return pVM->pgm.s.apGCPaePDs[1];
118 case PGMPOOL_IDX_PAE_PD_2:
119 return pVM->pgm.s.apGCPaePDs[2];
120 case PGMPOOL_IDX_PAE_PD_3:
121 return pVM->pgm.s.apGCPaePDs[3];
122 case PGMPOOL_IDX_PDPT:
123 return pVM->pgm.s.pGCPaePDPT;
124 default:
125 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
126 return NULL;
127 }
128}
129#endif /* IN_GC */
130
131
132#ifdef PGMPOOL_WITH_MONITORING
133/**
134 * Determin the size of a write instruction.
135 * @returns number of bytes written.
136 * @param pDis The disassembler state.
137 */
138static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
139{
140 /*
141 * This is very crude and possibly wrong for some opcodes,
142 * but since it's not really supposed to be called we can
143 * probably live with that.
144 */
145 return DISGetParamSize(pDis, &pDis->param1);
146}
147
148
149/**
150 * Flushes a chain of pages sharing the same access monitor.
151 *
152 * @returns VBox status code suitable for scheduling.
153 * @param pPool The pool.
154 * @param pPage A page in the chain.
155 */
156int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
157{
158 LogFlow(("pgmPoolMonitorChainFlush: Flush page %VGp type=%d\n", pPage->GCPhys, pPage->enmKind));
159
160 /*
161 * Find the list head.
162 */
163 uint16_t idx = pPage->idx;
164 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
165 {
166 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
167 {
168 idx = pPage->iMonitoredPrev;
169 Assert(idx != pPage->idx);
170 pPage = &pPool->aPages[idx];
171 }
172 }
173
174 /*
175 * Iterate the list flushing each shadow page.
176 */
177 int rc = VINF_SUCCESS;
178 for (;;)
179 {
180 idx = pPage->iMonitoredNext;
181 Assert(idx != pPage->idx);
182 if (pPage->idx >= PGMPOOL_IDX_FIRST)
183 {
184 int rc2 = pgmPoolFlushPage(pPool, pPage);
185 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
186 rc = VINF_PGM_SYNC_CR3;
187 }
188 /* next */
189 if (idx == NIL_PGMPOOL_IDX)
190 break;
191 pPage = &pPool->aPages[idx];
192 }
193 return rc;
194}
195
196
197/**
198 * Wrapper for getting the current context pointer to the entry being modified.
199 *
200 * @returns Pointer to the current context mapping of the entry.
201 * @param pPool The pool.
202 * @param pvFault The fault virtual address.
203 * @param GCPhysFault The fault physical address.
204 * @param cbEntry The entry size.
205 */
206#ifdef IN_RING3
207DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
208#else
209DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
210#endif
211{
212#ifdef IN_GC
213 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
214
215#elif defined(IN_RING0)
216 void *pvRet;
217 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
218 AssertFatalRCSuccess(rc);
219 return pvRet;
220
221#elif defined(IN_RING3)
222 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
223#else
224# error "huh?"
225#endif
226}
227
228
229/**
230 * Process shadow entries before they are changed by the guest.
231 *
232 * For PT entries we will clear them. For PD entries, we'll simply check
233 * for mapping conflicts and set the SyncCR3 FF if found.
234 *
235 * @param pPool The pool.
236 * @param pPage The head page.
237 * @param GCPhysFault The guest physical fault address.
238 * @param uAddress In R0 and GC this is the guest context fault address (flat).
239 * In R3 this is the host context 'fault' address.
240 * @param pCpu The disassembler state for figuring out the write size.
241 * This need not be specified if the caller knows we won't do cross entry accesses.
242 */
243#ifdef IN_RING3
244void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
245#else
246void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
247#endif
248{
249 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
250 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
251
252 LogFlow(("pgmPoolMonitorChainChanging: %VGv phys=%VGp kind=%d\n", pvAddress, GCPhysFault, pPage->enmKind));
253
254 for (;;)
255 {
256 union
257 {
258 void *pv;
259 PX86PT pPT;
260 PX86PTPAE pPTPae;
261 PX86PD pPD;
262 PX86PDPAE pPDPae;
263 PX86PDPT pPDPT;
264 PX86PML4 pPML4;
265 } uShw;
266 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
267
268 switch (pPage->enmKind)
269 {
270 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
271 {
272 const unsigned iShw = off / sizeof(X86PTE);
273 if (uShw.pPT->a[iShw].n.u1Present)
274 {
275# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
276 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
277 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
278 pgmPoolTracDerefGCPhysHint(pPool, pPage,
279 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
280 pGstPte->u & X86_PTE_PG_MASK);
281# endif
282 uShw.pPT->a[iShw].u = 0;
283 }
284 break;
285 }
286
287 /* page/2 sized */
288 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
289 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
290 {
291 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
292 if (uShw.pPTPae->a[iShw].n.u1Present)
293 {
294# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
295 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
296 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
297 pgmPoolTracDerefGCPhysHint(pPool, pPage,
298 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
299 pGstPte->u & X86_PTE_PG_MASK);
300# endif
301 uShw.pPTPae->a[iShw].u = 0;
302 }
303 }
304 break;
305
306 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
307 {
308 const unsigned iShw = off / sizeof(X86PTEPAE);
309 if (uShw.pPTPae->a[iShw].n.u1Present)
310 {
311# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
312 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
313 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
314 pgmPoolTracDerefGCPhysHint(pPool, pPage,
315 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
316 pGstPte->u & X86_PTE_PAE_PG_MASK);
317# endif
318 uShw.pPTPae->a[iShw].u = 0;
319 }
320
321 /* paranoia / a bit assumptive. */
322 if ( pCpu
323 && (off & 7)
324 && (off & 7) + pgmPoolDisasWriteSize(pCpu) > sizeof(X86PTEPAE))
325 {
326 AssertFailed();
327 }
328
329 break;
330 }
331
332 case PGMPOOLKIND_ROOT_32BIT_PD:
333 {
334 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
335 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
336 {
337 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
338 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
339 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
340 }
341 /* paranoia / a bit assumptive. */
342 else if ( pCpu
343 && (off & 3)
344 && (off & 3) + pgmPoolDisasWriteSize(pCpu) > 4)
345 {
346 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PTE);
347 if ( iShw2 != iShw
348 && iShw2 < ELEMENTS(uShw.pPD->a)
349 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
350 {
351 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
352 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
353 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
354 }
355 }
356#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
357 if ( uShw.pPD->a[iShw].n.u1Present
358 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
359 {
360 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
361# ifdef IN_GC /* TLB load - we're pushing things a bit... */
362 ASMProbeReadByte(pvAddress);
363# endif
364 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
365 uShw.pPD->a[iShw].u = 0;
366 }
367#endif
368 break;
369 }
370
371 case PGMPOOLKIND_ROOT_PAE_PD:
372 {
373 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
374 for (unsigned i = 0; i < 2; i++, iShw++)
375 {
376 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
377 {
378 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
379 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
380 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
381 }
382 /* paranoia / a bit assumptive. */
383 else if ( pCpu
384 && (off & 3)
385 && (off & 3) + pgmPoolDisasWriteSize(pCpu) > 4)
386 {
387 const unsigned iShw2 = iShw + 2;
388 if ( iShw2 < ELEMENTS(uShw.pPDPae->a)
389 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
390 {
391 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
392 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
393 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
394 }
395 }
396#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
397 if ( uShw.pPDPae->a[iShw].n.u1Present
398 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
399 {
400 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
401# ifdef IN_GC /* TLB load - we're pushing things a bit... */
402 ASMProbeReadByte(pvAddress);
403# endif
404 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
405 uShw.pPDPae->a[iShw].u = 0;
406 }
407#endif
408 }
409 break;
410 }
411
412 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
413 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
414 {
415 const unsigned iShw = off / sizeof(X86PDEPAE);
416 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
417 {
418 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
419 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
420 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
421 }
422#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
423 /* causes trouble when the guest uses a PDE to refer to the whole page table level structure. (invalidate here; faults later on when it tries
424 * to change the page table entries
425 */
426 else
427 {
428 if (uShw.pPDPae->a[iShw].n.u1Present)
429 {
430 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
431 pgmPoolFree(pPool->CTXSUFF(pVM),
432 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
433 /* Note: hardcoded PAE implementation dependency */
434 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
435 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
436 uShw.pPDPae->a[iShw].u = 0;
437 }
438 }
439#endif
440 /* paranoia / a bit assumptive. */
441 if ( pCpu
442 && (off & 7)
443 && (off & 7) + pgmPoolDisasWriteSize(pCpu) > sizeof(X86PDEPAE))
444 {
445 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PDEPAE);
446 AssertReturnVoid(iShw2 < ELEMENTS(uShw.pPDPae->a));
447
448 if ( iShw2 != iShw
449 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
450 {
451 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
452 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
453 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
454 }
455#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
456 else
457 if (uShw.pPDPae->a[iShw2].n.u1Present)
458 {
459 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
460 pgmPoolFree(pPool->CTXSUFF(pVM),
461 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
462 /* Note: hardcoded PAE implementation dependency */
463 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
464 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
465 uShw.pPDPae->a[iShw2].u = 0;
466 }
467#endif
468 }
469 break;
470 }
471
472 case PGMPOOLKIND_ROOT_PDPT:
473 {
474 /* Hopefully this doesn't happen very often:
475 * - touching unused parts of the page
476 * - messing with the bits of pd pointers without changing the physical address
477 */
478 const unsigned iShw = off / sizeof(X86PDPE);
479 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
480 {
481 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
482 {
483 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
484 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
485 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
486 }
487 /* paranoia / a bit assumptive. */
488 else if ( pCpu
489 && (off & 7)
490 && (off & 7) + pgmPoolDisasWriteSize(pCpu) > sizeof(X86PDPE))
491 {
492 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PDPE);
493 if ( iShw2 != iShw
494 && iShw2 < X86_PG_PAE_PDPE_ENTRIES
495 && uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
496 {
497 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
498 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
499 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
500 }
501 }
502 }
503 break;
504 }
505
506 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
507 {
508 /* Hopefully this doesn't happen very often:
509 * - messing with the bits of pd pointers without changing the physical address
510 */
511#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
512 if (!VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
513 {
514 const unsigned iShw = off / sizeof(X86PDPE);
515 if (uShw.pPDPT->a[iShw].n.u1Present)
516 {
517 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
518 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
519 uShw.pPDPT->a[iShw].u = 0;
520 }
521 /* paranoia / a bit assumptive. */
522 if ( pCpu
523 && (off & 7)
524 && (off & 7) + pgmPoolDisasWriteSize(pCpu) > sizeof(X86PDPE))
525 {
526 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PDPE);
527 if (uShw.pPDPT->a[iShw2].n.u1Present)
528 {
529 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
530 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
531 uShw.pPDPT->a[iShw2].u = 0;
532 }
533 }
534 }
535#endif
536 break;
537 }
538
539 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
540 {
541 /* Hopefully this doesn't happen very often:
542 * - messing with the bits of pd pointers without changing the physical address
543 */
544#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
545 if (!VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
546 {
547 const unsigned iShw = off / sizeof(X86PDPE);
548 if (uShw.pPML4->a[iShw].n.u1Present)
549 {
550 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
551 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
552 uShw.pPML4->a[iShw].u = 0;
553 }
554 /* paranoia / a bit assumptive. */
555 if ( pCpu
556 && (off & 7)
557 && (off & 7) + pgmPoolDisasWriteSize(pCpu) > sizeof(X86PDPE))
558 {
559 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PML4E);
560 if (uShw.pPML4->a[iShw2].n.u1Present)
561 {
562 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
563 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
564 uShw.pPML4->a[iShw2].u = 0;
565 }
566 }
567 }
568#endif
569 break;
570 }
571
572 default:
573 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
574 }
575
576 /* next */
577 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
578 return;
579 pPage = &pPool->aPages[pPage->iMonitoredNext];
580 }
581}
582
583
584# ifndef IN_RING3
585/**
586 * Checks if a access could be a fork operation in progress.
587 *
588 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
589 *
590 * @returns true if it's likly that we're forking, otherwise false.
591 * @param pPool The pool.
592 * @param pCpu The disassembled instruction.
593 * @param offFault The access offset.
594 */
595DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
596{
597 /*
598 * i386 linux is using btr to clear X86_PTE_RW.
599 * The functions involved are (2.6.16 source inspection):
600 * clear_bit
601 * ptep_set_wrprotect
602 * copy_one_pte
603 * copy_pte_range
604 * copy_pmd_range
605 * copy_pud_range
606 * copy_page_range
607 * dup_mmap
608 * dup_mm
609 * copy_mm
610 * copy_process
611 * do_fork
612 */
613 if ( pCpu->pCurInstr->opcode == OP_BTR
614 && !(offFault & 4)
615 /** @todo Validate that the bit index is X86_PTE_RW. */
616 )
617 {
618 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
619 return true;
620 }
621 return false;
622}
623
624
625/**
626 * Determin whether the page is likely to have been reused.
627 *
628 * @returns true if we consider the page as being reused for a different purpose.
629 * @returns false if we consider it to still be a paging page.
630 * @param pPage The page in question.
631 * @param pCpu The disassembly info for the faulting insturction.
632 * @param pvFault The fault address.
633 *
634 * @remark The REP prefix check is left to the caller because of STOSD/W.
635 */
636DECLINLINE(bool) pgmPoolMonitorIsReused(PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu, RTGCPTR pvFault)
637{
638 switch (pCpu->pCurInstr->opcode)
639 {
640 case OP_PUSH:
641 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
642 return true;
643 case OP_PUSHF:
644 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
645 return true;
646 case OP_PUSHA:
647 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
648 return true;
649 case OP_FXSAVE:
650 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
651 return true;
652 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
653 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
654 return true;
655 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
656 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
657 return true;
658 }
659 if ( (pCpu->param1.flags & USE_REG_GEN32)
660 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
661 {
662 Log4(("pgmPoolMonitorIsReused: ESP\n"));
663 return true;
664 }
665
666 //if (pPage->fCR3Mix)
667 // return false;
668 return false;
669}
670
671
672/**
673 * Flushes the page being accessed.
674 *
675 * @returns VBox status code suitable for scheduling.
676 * @param pVM The VM handle.
677 * @param pPool The pool.
678 * @param pPage The pool page (head).
679 * @param pCpu The disassembly of the write instruction.
680 * @param pRegFrame The trap register frame.
681 * @param GCPhysFault The fault address as guest physical address.
682 * @param pvFault The fault address.
683 */
684static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
685 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
686{
687 /*
688 * First, do the flushing.
689 */
690 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
691
692 /*
693 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
694 */
695 uint32_t cbWritten;
696 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
697 if (VBOX_SUCCESS(rc2))
698 pRegFrame->rip += pCpu->opsize;
699 else if (rc2 == VERR_EM_INTERPRETER)
700 {
701#ifdef IN_GC
702 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
703 {
704 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
705 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
706 rc = VINF_SUCCESS;
707 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
708 }
709 else
710#endif
711 {
712 rc = VINF_EM_RAW_EMULATE_INSTR;
713 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
714 }
715 }
716 else
717 rc = rc2;
718
719 /* See use in pgmPoolAccessHandlerSimple(). */
720 PGM_INVL_GUEST_TLBS();
721
722 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
723 return rc;
724
725}
726
727
728/**
729 * Handles the STOSD write accesses.
730 *
731 * @returns VBox status code suitable for scheduling.
732 * @param pVM The VM handle.
733 * @param pPool The pool.
734 * @param pPage The pool page (head).
735 * @param pCpu The disassembly of the write instruction.
736 * @param pRegFrame The trap register frame.
737 * @param GCPhysFault The fault address as guest physical address.
738 * @param pvFault The fault address.
739 */
740DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
741 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
742{
743 /*
744 * Increment the modification counter and insert it into the list
745 * of modified pages the first time.
746 */
747 if (!pPage->cModifications++)
748 pgmPoolMonitorModifiedInsert(pPool, pPage);
749
750 /*
751 * Execute REP STOSD.
752 *
753 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
754 * write situation, meaning that it's safe to write here.
755 */
756 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
757 while (pRegFrame->ecx)
758 {
759 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
760#ifdef IN_GC
761 *(uint32_t *)pu32 = pRegFrame->eax;
762#else
763 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
764#endif
765 pu32 += 4;
766 GCPhysFault += 4;
767 pRegFrame->edi += 4;
768 pRegFrame->ecx--;
769 }
770 pRegFrame->rip += pCpu->opsize;
771
772 /* See use in pgmPoolAccessHandlerSimple(). */
773 PGM_INVL_GUEST_TLBS();
774
775 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
776 return VINF_SUCCESS;
777}
778
779
780/**
781 * Handles the simple write accesses.
782 *
783 * @returns VBox status code suitable for scheduling.
784 * @param pVM The VM handle.
785 * @param pPool The pool.
786 * @param pPage The pool page (head).
787 * @param pCpu The disassembly of the write instruction.
788 * @param pRegFrame The trap register frame.
789 * @param GCPhysFault The fault address as guest physical address.
790 * @param pvFault The fault address.
791 */
792DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
793 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
794{
795 /*
796 * Increment the modification counter and insert it into the list
797 * of modified pages the first time.
798 */
799 if (!pPage->cModifications++)
800 pgmPoolMonitorModifiedInsert(pPool, pPage);
801
802 /*
803 * Clear all the pages. ASSUMES that pvFault is readable.
804 */
805 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
806
807 /*
808 * Interpret the instruction.
809 */
810 uint32_t cb;
811 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
812 if (VBOX_SUCCESS(rc))
813 pRegFrame->rip += pCpu->opsize;
814 else if (rc == VERR_EM_INTERPRETER)
815 {
816 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv - opcode=%d\n",
817 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
818 rc = VINF_EM_RAW_EMULATE_INSTR;
819 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
820 }
821
822 /*
823 * Quick hack, with logging enabled we're getting stale
824 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
825 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
826 * have to be fixed to support this. But that'll have to wait till next week.
827 *
828 * An alternative is to keep track of the changed PTEs together with the
829 * GCPhys from the guest PT. This may proove expensive though.
830 *
831 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
832 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
833 */
834 PGM_INVL_GUEST_TLBS();
835
836 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
837 return rc;
838}
839
840
841/**
842 * \#PF Handler callback for PT write accesses.
843 *
844 * @returns VBox status code (appropriate for GC return).
845 * @param pVM VM Handle.
846 * @param uErrorCode CPU Error code.
847 * @param pRegFrame Trap register frame.
848 * NULL on DMA and other non CPU access.
849 * @param pvFault The fault address (cr2).
850 * @param GCPhysFault The GC physical address corresponding to pvFault.
851 * @param pvUser User argument.
852 */
853DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
854{
855 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
856 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
857 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
858 LogFlow(("pgmPoolAccessHandler: pvFault=%VGv pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
859
860 /*
861 * We should ALWAYS have the list head as user parameter. This
862 * is because we use that page to record the changes.
863 */
864 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
865
866 /*
867 * Disassemble the faulting instruction.
868 */
869 DISCPUSTATE Cpu;
870 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
871 AssertRCReturn(rc, rc);
872
873 /*
874 * Check if it's worth dealing with.
875 */
876 bool fReused = false;
877 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
878 || pPage->fCR3Mix)
879 && !(fReused = pgmPoolMonitorIsReused(pPage, &Cpu, pvFault))
880 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
881 {
882 /*
883 * Simple instructions, no REP prefix.
884 */
885 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
886 {
887 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
888 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
889 return rc;
890 }
891
892 /*
893 * Windows is frequently doing small memset() operations (netio test 4k+).
894 * We have to deal with these or we'll kill the cache and performance.
895 */
896 if ( Cpu.pCurInstr->opcode == OP_STOSWD
897 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
898 && pRegFrame->ecx <= 0x20
899 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
900 && !((uintptr_t)pvFault & 3)
901 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
902 && Cpu.mode == CPUMODE_32BIT
903 && Cpu.opmode == CPUMODE_32BIT
904 && Cpu.addrmode == CPUMODE_32BIT
905 && Cpu.prefix == PREFIX_REP
906 && !pRegFrame->eflags.Bits.u1DF
907 )
908 {
909 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
910 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
911 return rc;
912 }
913
914 /* REP prefix, don't bother. */
915 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
916 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%VGv opcode=%d prefix=%#x\n",
917 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
918 }
919
920 /*
921 * Not worth it, so flush it.
922 *
923 * If we considered it to be reused, don't to back to ring-3
924 * to emulate failed instructions since we usually cannot
925 * interpret then. This may be a bit risky, in which case
926 * the reuse detection must be fixed.
927 */
928 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
929 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
930 rc = VINF_SUCCESS;
931 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
932 return rc;
933}
934
935# endif /* !IN_RING3 */
936#endif /* PGMPOOL_WITH_MONITORING */
937
938
939
940#ifdef PGMPOOL_WITH_CACHE
941/**
942 * Inserts a page into the GCPhys hash table.
943 *
944 * @param pPool The pool.
945 * @param pPage The page.
946 */
947DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
948{
949 Log3(("pgmPoolHashInsert: %VGp\n", pPage->GCPhys));
950 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
951 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
952 pPage->iNext = pPool->aiHash[iHash];
953 pPool->aiHash[iHash] = pPage->idx;
954}
955
956
957/**
958 * Removes a page from the GCPhys hash table.
959 *
960 * @param pPool The pool.
961 * @param pPage The page.
962 */
963DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
964{
965 Log3(("pgmPoolHashRemove: %VGp\n", pPage->GCPhys));
966 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
967 if (pPool->aiHash[iHash] == pPage->idx)
968 pPool->aiHash[iHash] = pPage->iNext;
969 else
970 {
971 uint16_t iPrev = pPool->aiHash[iHash];
972 for (;;)
973 {
974 const int16_t i = pPool->aPages[iPrev].iNext;
975 if (i == pPage->idx)
976 {
977 pPool->aPages[iPrev].iNext = pPage->iNext;
978 break;
979 }
980 if (i == NIL_PGMPOOL_IDX)
981 {
982 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
983 break;
984 }
985 iPrev = i;
986 }
987 }
988 pPage->iNext = NIL_PGMPOOL_IDX;
989}
990
991
992/**
993 * Frees up one cache page.
994 *
995 * @returns VBox status code.
996 * @retval VINF_SUCCESS on success.
997 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
998 * @param pPool The pool.
999 * @param iUser The user index.
1000 */
1001static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1002{
1003#ifndef IN_GC
1004 const PVM pVM = pPool->CTXSUFF(pVM);
1005#endif
1006 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1007 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1008
1009 /*
1010 * Select one page from the tail of the age list.
1011 */
1012 uint16_t iToFree = pPool->iAgeTail;
1013 if (iToFree == iUser)
1014 iToFree = pPool->aPages[iToFree].iAgePrev;
1015/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1016 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1017 {
1018 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1019 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1020 {
1021 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1022 continue;
1023 iToFree = i;
1024 break;
1025 }
1026 }
1027*/
1028 Assert(iToFree != iUser);
1029 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1030
1031 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
1032 if (rc == VINF_SUCCESS)
1033 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1034 return rc;
1035}
1036
1037
1038/**
1039 * Checks if a kind mismatch is really a page being reused
1040 * or if it's just normal remappings.
1041 *
1042 * @returns true if reused and the cached page (enmKind1) should be flushed
1043 * @returns false if not reused.
1044 * @param enmKind1 The kind of the cached page.
1045 * @param enmKind2 The kind of the requested page.
1046 */
1047static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1048{
1049 switch (enmKind1)
1050 {
1051 /*
1052 * Never reuse them. There is no remapping in non-paging mode.
1053 */
1054 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1055 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1056 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1057 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1058 return true;
1059
1060 /*
1061 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1062 */
1063 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1064 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1065 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1066 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1067 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1068 switch (enmKind2)
1069 {
1070 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1071 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1072 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1073 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1074 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1075 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1076 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1077 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1078 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1079 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1080 return true;
1081 default:
1082 return false;
1083 }
1084
1085 /*
1086 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1087 */
1088 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1089 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1090 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1091 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1092 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1093 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1094 switch (enmKind2)
1095 {
1096 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1097 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1098 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1099 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1100 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1101 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1102 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1103 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1104 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1105 return true;
1106 default:
1107 return false;
1108 }
1109
1110 /*
1111 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1112 */
1113 case PGMPOOLKIND_ROOT_32BIT_PD:
1114 case PGMPOOLKIND_ROOT_PAE_PD:
1115 case PGMPOOLKIND_ROOT_PDPT:
1116 case PGMPOOLKIND_ROOT_NESTED:
1117 return false;
1118
1119 default:
1120 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1121 }
1122}
1123
1124
1125/**
1126 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1127 *
1128 * @returns VBox status code.
1129 * @retval VINF_PGM_CACHED_PAGE on success.
1130 * @retval VERR_FILE_NOT_FOUND if not found.
1131 * @param pPool The pool.
1132 * @param GCPhys The GC physical address of the page we're gonna shadow.
1133 * @param enmKind The kind of mapping.
1134 * @param iUser The shadow page pool index of the user table.
1135 * @param iUserTable The index into the user table (shadowed).
1136 * @param ppPage Where to store the pointer to the page.
1137 */
1138static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1139{
1140#ifndef IN_GC
1141 const PVM pVM = pPool->CTXSUFF(pVM);
1142#endif
1143 /*
1144 * Look up the GCPhys in the hash.
1145 */
1146 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1147 Log3(("pgmPoolCacheAlloc: %VGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1148 if (i != NIL_PGMPOOL_IDX)
1149 {
1150 do
1151 {
1152 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1153 Log3(("pgmPoolCacheAlloc: slot %d found page %VGp\n", i, pPage->GCPhys));
1154 if (pPage->GCPhys == GCPhys)
1155 {
1156 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1157 {
1158 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1159 if (VBOX_SUCCESS(rc))
1160 {
1161 *ppPage = pPage;
1162 STAM_COUNTER_INC(&pPool->StatCacheHits);
1163 return VINF_PGM_CACHED_PAGE;
1164 }
1165 return rc;
1166 }
1167
1168 /*
1169 * The kind is different. In some cases we should now flush the page
1170 * as it has been reused, but in most cases this is normal remapping
1171 * of PDs as PT or big pages using the GCPhys field in a slightly
1172 * different way than the other kinds.
1173 */
1174 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1175 {
1176 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1177 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1178 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1179 break;
1180 }
1181 }
1182
1183 /* next */
1184 i = pPage->iNext;
1185 } while (i != NIL_PGMPOOL_IDX);
1186 }
1187
1188 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1189 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1190 return VERR_FILE_NOT_FOUND;
1191}
1192
1193
1194/**
1195 * Inserts a page into the cache.
1196 *
1197 * @param pPool The pool.
1198 * @param pPage The cached page.
1199 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1200 */
1201static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1202{
1203 /*
1204 * Insert into the GCPhys hash if the page is fit for that.
1205 */
1206 Assert(!pPage->fCached);
1207 if (fCanBeCached)
1208 {
1209 pPage->fCached = true;
1210 pgmPoolHashInsert(pPool, pPage);
1211 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1212 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1213 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1214 }
1215 else
1216 {
1217 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1218 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1219 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1220 }
1221
1222 /*
1223 * Insert at the head of the age list.
1224 */
1225 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1226 pPage->iAgeNext = pPool->iAgeHead;
1227 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1228 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1229 else
1230 pPool->iAgeTail = pPage->idx;
1231 pPool->iAgeHead = pPage->idx;
1232}
1233
1234
1235/**
1236 * Flushes a cached page.
1237 *
1238 * @param pPool The pool.
1239 * @param pPage The cached page.
1240 */
1241static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1242{
1243 Log3(("pgmPoolCacheFlushPage: %VGp\n", pPage->GCPhys));
1244
1245 /*
1246 * Remove the page from the hash.
1247 */
1248 if (pPage->fCached)
1249 {
1250 pPage->fCached = false;
1251 pgmPoolHashRemove(pPool, pPage);
1252 }
1253 else
1254 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1255
1256 /*
1257 * Remove it from the age list.
1258 */
1259 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1260 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1261 else
1262 pPool->iAgeTail = pPage->iAgePrev;
1263 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1264 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1265 else
1266 pPool->iAgeHead = pPage->iAgeNext;
1267 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1268 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1269}
1270#endif /* PGMPOOL_WITH_CACHE */
1271
1272
1273#ifdef PGMPOOL_WITH_MONITORING
1274/**
1275 * Looks for pages sharing the monitor.
1276 *
1277 * @returns Pointer to the head page.
1278 * @returns NULL if not found.
1279 * @param pPool The Pool
1280 * @param pNewPage The page which is going to be monitored.
1281 */
1282static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1283{
1284#ifdef PGMPOOL_WITH_CACHE
1285 /*
1286 * Look up the GCPhys in the hash.
1287 */
1288 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1289 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1290 if (i == NIL_PGMPOOL_IDX)
1291 return NULL;
1292 do
1293 {
1294 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1295 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1296 && pPage != pNewPage)
1297 {
1298 switch (pPage->enmKind)
1299 {
1300 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1301 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1302 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1303 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1304 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1305 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1306 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1307 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1308 case PGMPOOLKIND_ROOT_32BIT_PD:
1309 case PGMPOOLKIND_ROOT_PAE_PD:
1310 case PGMPOOLKIND_ROOT_PDPT:
1311 {
1312 /* find the head */
1313 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1314 {
1315 Assert(pPage->iMonitoredPrev != pPage->idx);
1316 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1317 }
1318 return pPage;
1319 }
1320
1321 /* ignore, no monitoring. */
1322 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1323 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1324 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1325 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1326 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1327 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1328 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1329 case PGMPOOLKIND_ROOT_NESTED:
1330 break;
1331 default:
1332 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1333 }
1334 }
1335
1336 /* next */
1337 i = pPage->iNext;
1338 } while (i != NIL_PGMPOOL_IDX);
1339#endif
1340 return NULL;
1341}
1342
1343/**
1344 * Enabled write monitoring of a guest page.
1345 *
1346 * @returns VBox status code.
1347 * @retval VINF_SUCCESS on success.
1348 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1349 * @param pPool The pool.
1350 * @param pPage The cached page.
1351 */
1352static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1353{
1354 LogFlow(("pgmPoolMonitorInsert %VGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1355
1356 /*
1357 * Filter out the relevant kinds.
1358 */
1359 switch (pPage->enmKind)
1360 {
1361 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1362 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1363 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1364 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1365 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1366 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1367 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1368 case PGMPOOLKIND_ROOT_PDPT:
1369 break;
1370
1371 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1372 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1373 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1374 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1375 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1376 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1377 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1378 case PGMPOOLKIND_ROOT_NESTED:
1379 /* Nothing to monitor here. */
1380 return VINF_SUCCESS;
1381
1382 case PGMPOOLKIND_ROOT_32BIT_PD:
1383 case PGMPOOLKIND_ROOT_PAE_PD:
1384#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1385 break;
1386#endif
1387 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1388 default:
1389 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1390 }
1391
1392 /*
1393 * Install handler.
1394 */
1395 int rc;
1396 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1397 if (pPageHead)
1398 {
1399 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1400 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1401 pPage->iMonitoredPrev = pPageHead->idx;
1402 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1403 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1404 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1405 pPageHead->iMonitoredNext = pPage->idx;
1406 rc = VINF_SUCCESS;
1407 }
1408 else
1409 {
1410 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1411 PVM pVM = pPool->CTXSUFF(pVM);
1412 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1413 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1414 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1415 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1416 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1417 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
1418 pPool->pszAccessHandler);
1419 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1420 * the heap size should suffice. */
1421 AssertFatalRC(rc);
1422 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1423 rc = VERR_PGM_POOL_CLEARED;
1424 }
1425 pPage->fMonitored = true;
1426 return rc;
1427}
1428
1429
1430/**
1431 * Disables write monitoring of a guest page.
1432 *
1433 * @returns VBox status code.
1434 * @retval VINF_SUCCESS on success.
1435 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1436 * @param pPool The pool.
1437 * @param pPage The cached page.
1438 */
1439static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1440{
1441 /*
1442 * Filter out the relevant kinds.
1443 */
1444 switch (pPage->enmKind)
1445 {
1446 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1447 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1448 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1449 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1450 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1451 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1452 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1453 case PGMPOOLKIND_ROOT_PDPT:
1454 break;
1455
1456 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1457 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1458 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1459 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1460 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1461 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1462 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1463 case PGMPOOLKIND_ROOT_NESTED:
1464 /* Nothing to monitor here. */
1465 return VINF_SUCCESS;
1466
1467 case PGMPOOLKIND_ROOT_32BIT_PD:
1468 case PGMPOOLKIND_ROOT_PAE_PD:
1469#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1470 break;
1471#endif
1472 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1473 default:
1474 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1475 }
1476
1477 /*
1478 * Remove the page from the monitored list or uninstall it if last.
1479 */
1480 const PVM pVM = pPool->CTXSUFF(pVM);
1481 int rc;
1482 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1483 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1484 {
1485 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1486 {
1487 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1488 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1489 pNewHead->fCR3Mix = pPage->fCR3Mix;
1490 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1491 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1492 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1493 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pNewHead),
1494 pPool->pszAccessHandler);
1495 AssertFatalRCSuccess(rc);
1496 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1497 }
1498 else
1499 {
1500 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1501 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1502 {
1503 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1504 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1505 }
1506 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1507 rc = VINF_SUCCESS;
1508 }
1509 }
1510 else
1511 {
1512 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1513 AssertFatalRC(rc);
1514 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1515 rc = VERR_PGM_POOL_CLEARED;
1516 }
1517 pPage->fMonitored = false;
1518
1519 /*
1520 * Remove it from the list of modified pages (if in it).
1521 */
1522 pgmPoolMonitorModifiedRemove(pPool, pPage);
1523
1524 return rc;
1525}
1526
1527
1528#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1529/**
1530 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1531 *
1532 * @param pPool The Pool.
1533 * @param pPage A page in the chain.
1534 * @param fCR3Mix The new fCR3Mix value.
1535 */
1536static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1537{
1538 /* current */
1539 pPage->fCR3Mix = fCR3Mix;
1540
1541 /* before */
1542 int16_t idx = pPage->iMonitoredPrev;
1543 while (idx != NIL_PGMPOOL_IDX)
1544 {
1545 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1546 idx = pPool->aPages[idx].iMonitoredPrev;
1547 }
1548
1549 /* after */
1550 idx = pPage->iMonitoredNext;
1551 while (idx != NIL_PGMPOOL_IDX)
1552 {
1553 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1554 idx = pPool->aPages[idx].iMonitoredNext;
1555 }
1556}
1557
1558
1559/**
1560 * Installs or modifies monitoring of a CR3 page (special).
1561 *
1562 * We're pretending the CR3 page is shadowed by the pool so we can use the
1563 * generic mechanisms in detecting chained monitoring. (This also gives us a
1564 * tast of what code changes are required to really pool CR3 shadow pages.)
1565 *
1566 * @returns VBox status code.
1567 * @param pPool The pool.
1568 * @param idxRoot The CR3 (root) page index.
1569 * @param GCPhysCR3 The (new) CR3 value.
1570 */
1571int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1572{
1573 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1574 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1575 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1576 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1577
1578 /*
1579 * The unlikely case where it already matches.
1580 */
1581 if (pPage->GCPhys == GCPhysCR3)
1582 {
1583 Assert(pPage->fMonitored);
1584 return VINF_SUCCESS;
1585 }
1586
1587 /*
1588 * Flush the current monitoring and remove it from the hash.
1589 */
1590 int rc = VINF_SUCCESS;
1591 if (pPage->fMonitored)
1592 {
1593 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1594 rc = pgmPoolMonitorFlush(pPool, pPage);
1595 if (rc == VERR_PGM_POOL_CLEARED)
1596 rc = VINF_SUCCESS;
1597 else
1598 AssertFatalRC(rc);
1599 pgmPoolHashRemove(pPool, pPage);
1600 }
1601
1602 /*
1603 * Monitor the page at the new location and insert it into the hash.
1604 */
1605 pPage->GCPhys = GCPhysCR3;
1606 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1607 if (rc2 != VERR_PGM_POOL_CLEARED)
1608 {
1609 AssertFatalRC(rc2);
1610 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1611 rc = rc2;
1612 }
1613 pgmPoolHashInsert(pPool, pPage);
1614 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1615 return rc;
1616}
1617
1618
1619/**
1620 * Removes the monitoring of a CR3 page (special).
1621 *
1622 * @returns VBox status code.
1623 * @param pPool The pool.
1624 * @param idxRoot The CR3 (root) page index.
1625 */
1626int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1627{
1628 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1629 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1630 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1631 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1632
1633 if (!pPage->fMonitored)
1634 return VINF_SUCCESS;
1635
1636 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1637 int rc = pgmPoolMonitorFlush(pPool, pPage);
1638 if (rc != VERR_PGM_POOL_CLEARED)
1639 AssertFatalRC(rc);
1640 else
1641 rc = VINF_SUCCESS;
1642 pgmPoolHashRemove(pPool, pPage);
1643 Assert(!pPage->fMonitored);
1644 pPage->GCPhys = NIL_RTGCPHYS;
1645 return rc;
1646}
1647#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1648
1649
1650/**
1651 * Inserts the page into the list of modified pages.
1652 *
1653 * @param pPool The pool.
1654 * @param pPage The page.
1655 */
1656void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1657{
1658 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1659 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1660 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1661 && pPool->iModifiedHead != pPage->idx,
1662 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1663 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1664 pPool->iModifiedHead, pPool->cModifiedPages));
1665
1666 pPage->iModifiedNext = pPool->iModifiedHead;
1667 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1668 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1669 pPool->iModifiedHead = pPage->idx;
1670 pPool->cModifiedPages++;
1671#ifdef VBOX_WITH_STATISTICS
1672 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1673 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1674#endif
1675}
1676
1677
1678/**
1679 * Removes the page from the list of modified pages and resets the
1680 * moficiation counter.
1681 *
1682 * @param pPool The pool.
1683 * @param pPage The page which is believed to be in the list of modified pages.
1684 */
1685static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1686{
1687 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1688 if (pPool->iModifiedHead == pPage->idx)
1689 {
1690 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1691 pPool->iModifiedHead = pPage->iModifiedNext;
1692 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1693 {
1694 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1695 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1696 }
1697 pPool->cModifiedPages--;
1698 }
1699 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1700 {
1701 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1702 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1703 {
1704 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1705 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1706 }
1707 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1708 pPool->cModifiedPages--;
1709 }
1710 else
1711 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1712 pPage->cModifications = 0;
1713}
1714
1715
1716/**
1717 * Zaps the list of modified pages, resetting their modification counters in the process.
1718 *
1719 * @param pVM The VM handle.
1720 */
1721void pgmPoolMonitorModifiedClearAll(PVM pVM)
1722{
1723 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1724 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1725
1726 unsigned cPages = 0; NOREF(cPages);
1727 uint16_t idx = pPool->iModifiedHead;
1728 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1729 while (idx != NIL_PGMPOOL_IDX)
1730 {
1731 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1732 idx = pPage->iModifiedNext;
1733 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1734 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1735 pPage->cModifications = 0;
1736 Assert(++cPages);
1737 }
1738 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1739 pPool->cModifiedPages = 0;
1740}
1741
1742
1743/**
1744 * Clear all shadow pages and clear all modification counters.
1745 *
1746 * @param pVM The VM handle.
1747 * @remark Should only be used when monitoring is available, thus placed in
1748 * the PGMPOOL_WITH_MONITORING #ifdef.
1749 */
1750void pgmPoolClearAll(PVM pVM)
1751{
1752 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1753 STAM_PROFILE_START(&pPool->StatClearAll, c);
1754 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1755
1756 /*
1757 * Iterate all the pages until we've encountered all that in use.
1758 * This is simple but not quite optimal solution.
1759 */
1760 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1761 unsigned cLeft = pPool->cUsedPages;
1762 unsigned iPage = pPool->cCurPages;
1763 while (--iPage >= PGMPOOL_IDX_FIRST)
1764 {
1765 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1766 if (pPage->GCPhys != NIL_RTGCPHYS)
1767 {
1768 switch (pPage->enmKind)
1769 {
1770 /*
1771 * We only care about shadow page tables.
1772 */
1773 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1774 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1775 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1776 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1777 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1778 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1779 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1780 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1781 {
1782#ifdef PGMPOOL_WITH_USER_TRACKING
1783 if (pPage->cPresent)
1784#endif
1785 {
1786 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1787 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1788 ASMMemZeroPage(pvShw);
1789 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1790#ifdef PGMPOOL_WITH_USER_TRACKING
1791 pPage->cPresent = 0;
1792 pPage->iFirstPresent = ~0;
1793#endif
1794 }
1795 }
1796 /* fall thru */
1797
1798 default:
1799 Assert(!pPage->cModifications || ++cModifiedPages);
1800 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1801 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1802 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1803 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1804 pPage->cModifications = 0;
1805 break;
1806
1807 }
1808 if (!--cLeft)
1809 break;
1810 }
1811 }
1812
1813 /* swipe the special pages too. */
1814 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1815 {
1816 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1817 if (pPage->GCPhys != NIL_RTGCPHYS)
1818 {
1819 Assert(!pPage->cModifications || ++cModifiedPages);
1820 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1821 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1822 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1823 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1824 pPage->cModifications = 0;
1825 }
1826 }
1827
1828#ifndef DEBUG_michael
1829 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1830#endif
1831 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1832 pPool->cModifiedPages = 0;
1833
1834#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1835 /*
1836 * Clear all the GCPhys links and rebuild the phys ext free list.
1837 */
1838 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
1839 pRam;
1840 pRam = CTXALLSUFF(pRam->pNext))
1841 {
1842 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1843 while (iPage-- > 0)
1844 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
1845 }
1846
1847 pPool->iPhysExtFreeHead = 0;
1848 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1849 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1850 for (unsigned i = 0; i < cMaxPhysExts; i++)
1851 {
1852 paPhysExts[i].iNext = i + 1;
1853 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1854 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1855 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1856 }
1857 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1858#endif
1859
1860
1861 pPool->cPresent = 0;
1862 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1863}
1864#endif /* PGMPOOL_WITH_MONITORING */
1865
1866
1867#ifdef PGMPOOL_WITH_USER_TRACKING
1868/**
1869 * Frees up at least one user entry.
1870 *
1871 * @returns VBox status code.
1872 * @retval VINF_SUCCESS if successfully added.
1873 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1874 * @param pPool The pool.
1875 * @param iUser The user index.
1876 */
1877static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
1878{
1879 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
1880#ifdef PGMPOOL_WITH_CACHE
1881 /*
1882 * Just free cached pages in a braindead fashion.
1883 */
1884 /** @todo walk the age list backwards and free the first with usage. */
1885 int rc = VINF_SUCCESS;
1886 do
1887 {
1888 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
1889 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
1890 rc = rc2;
1891 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
1892 return rc;
1893#else
1894 /*
1895 * Lazy approach.
1896 */
1897 pgmPoolFlushAllInt(pPool);
1898 return VERR_PGM_POOL_FLUSHED;
1899#endif
1900}
1901
1902
1903/**
1904 * Inserts a page into the cache.
1905 *
1906 * This will create user node for the page, insert it into the GCPhys
1907 * hash, and insert it into the age list.
1908 *
1909 * @returns VBox status code.
1910 * @retval VINF_SUCCESS if successfully added.
1911 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1912 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1913 * @param pPool The pool.
1914 * @param pPage The cached page.
1915 * @param GCPhys The GC physical address of the page we're gonna shadow.
1916 * @param iUser The user index.
1917 * @param iUserTable The user table index.
1918 */
1919DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
1920{
1921 int rc = VINF_SUCCESS;
1922 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
1923
1924 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
1925
1926 /*
1927 * Find free a user node.
1928 */
1929 uint16_t i = pPool->iUserFreeHead;
1930 if (i == NIL_PGMPOOL_USER_INDEX)
1931 {
1932 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1933 if (VBOX_FAILURE(rc))
1934 return rc;
1935 i = pPool->iUserFreeHead;
1936 }
1937
1938 /*
1939 * Unlink the user node from the free list,
1940 * initialize and insert it into the user list.
1941 */
1942 pPool->iUserFreeHead = pUser[i].iNext;
1943 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
1944 pUser[i].iUser = iUser;
1945 pUser[i].iUserTable = iUserTable;
1946 pPage->iUserHead = i;
1947
1948 /*
1949 * Insert into cache and enable monitoring of the guest page if enabled.
1950 *
1951 * Until we implement caching of all levels, including the CR3 one, we'll
1952 * have to make sure we don't try monitor & cache any recursive reuse of
1953 * a monitored CR3 page. Because all windows versions are doing this we'll
1954 * have to be able to do combined access monitoring, CR3 + PT and
1955 * PD + PT (guest PAE).
1956 *
1957 * Update:
1958 * We're now cooperating with the CR3 monitor if an uncachable page is found.
1959 */
1960#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
1961# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1962 const bool fCanBeMonitored = true;
1963# else
1964 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
1965 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
1966 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
1967# endif
1968# ifdef PGMPOOL_WITH_CACHE
1969 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
1970# endif
1971 if (fCanBeMonitored)
1972 {
1973# ifdef PGMPOOL_WITH_MONITORING
1974 rc = pgmPoolMonitorInsert(pPool, pPage);
1975 if (rc == VERR_PGM_POOL_CLEARED)
1976 {
1977 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
1978# ifndef PGMPOOL_WITH_CACHE
1979 pgmPoolMonitorFlush(pPool, pPage);
1980 rc = VERR_PGM_POOL_FLUSHED;
1981# endif
1982 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
1983 pUser[i].iNext = pPool->iUserFreeHead;
1984 pUser[i].iUser = NIL_PGMPOOL_IDX;
1985 pPool->iUserFreeHead = i;
1986 }
1987 }
1988# endif
1989#endif /* PGMPOOL_WITH_MONITORING */
1990 return rc;
1991}
1992
1993
1994# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
1995/**
1996 * Adds a user reference to a page.
1997 *
1998 * This will
1999 * This will move the page to the head of the
2000 *
2001 * @returns VBox status code.
2002 * @retval VINF_SUCCESS if successfully added.
2003 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2004 * @param pPool The pool.
2005 * @param pPage The cached page.
2006 * @param iUser The user index.
2007 * @param iUserTable The user table.
2008 */
2009static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2010{
2011 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2012
2013 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
2014# ifdef VBOX_STRICT
2015 /*
2016 * Check that the entry doesn't already exists.
2017 */
2018 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2019 {
2020 uint16_t i = pPage->iUserHead;
2021 do
2022 {
2023 Assert(i < pPool->cMaxUsers);
2024 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2025 i = paUsers[i].iNext;
2026 } while (i != NIL_PGMPOOL_USER_INDEX);
2027 }
2028# endif
2029
2030 /*
2031 * Allocate a user node.
2032 */
2033 uint16_t i = pPool->iUserFreeHead;
2034 if (i == NIL_PGMPOOL_USER_INDEX)
2035 {
2036 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2037 if (VBOX_FAILURE(rc))
2038 return rc;
2039 i = pPool->iUserFreeHead;
2040 }
2041 pPool->iUserFreeHead = paUsers[i].iNext;
2042
2043 /*
2044 * Initialize the user node and insert it.
2045 */
2046 paUsers[i].iNext = pPage->iUserHead;
2047 paUsers[i].iUser = iUser;
2048 paUsers[i].iUserTable = iUserTable;
2049 pPage->iUserHead = i;
2050
2051# ifdef PGMPOOL_WITH_CACHE
2052 /*
2053 * Tell the cache to update its replacement stats for this page.
2054 */
2055 pgmPoolCacheUsed(pPool, pPage);
2056# endif
2057 return VINF_SUCCESS;
2058}
2059# endif /* PGMPOOL_WITH_CACHE */
2060
2061
2062/**
2063 * Frees a user record associated with a page.
2064 *
2065 * This does not clear the entry in the user table, it simply replaces the
2066 * user record to the chain of free records.
2067 *
2068 * @param pPool The pool.
2069 * @param HCPhys The HC physical address of the shadow page.
2070 * @param iUser The shadow page pool index of the user table.
2071 * @param iUserTable The index into the user table (shadowed).
2072 */
2073static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2074{
2075 /*
2076 * Unlink and free the specified user entry.
2077 */
2078 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2079
2080 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2081 uint16_t i = pPage->iUserHead;
2082 if ( i != NIL_PGMPOOL_USER_INDEX
2083 && paUsers[i].iUser == iUser
2084 && paUsers[i].iUserTable == iUserTable)
2085 {
2086 pPage->iUserHead = paUsers[i].iNext;
2087
2088 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2089 paUsers[i].iNext = pPool->iUserFreeHead;
2090 pPool->iUserFreeHead = i;
2091 return;
2092 }
2093
2094 /* General: Linear search. */
2095 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2096 while (i != NIL_PGMPOOL_USER_INDEX)
2097 {
2098 if ( paUsers[i].iUser == iUser
2099 && paUsers[i].iUserTable == iUserTable)
2100 {
2101 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2102 paUsers[iPrev].iNext = paUsers[i].iNext;
2103 else
2104 pPage->iUserHead = paUsers[i].iNext;
2105
2106 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2107 paUsers[i].iNext = pPool->iUserFreeHead;
2108 pPool->iUserFreeHead = i;
2109 return;
2110 }
2111 iPrev = i;
2112 i = paUsers[i].iNext;
2113 }
2114
2115 /* Fatal: didn't find it */
2116 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
2117 iUser, iUserTable, pPage->GCPhys));
2118}
2119
2120
2121/**
2122 * Gets the entry size of a shadow table.
2123 *
2124 * @param enmKind The kind of page.
2125 *
2126 * @returns The size of the entry in bytes. That is, 4 or 8.
2127 * @returns If the kind is not for a table, an assertion is raised and 0 is
2128 * returned.
2129 */
2130DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2131{
2132 switch (enmKind)
2133 {
2134 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2135 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2136 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2137 case PGMPOOLKIND_ROOT_32BIT_PD:
2138 return 4;
2139
2140 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2141 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2142 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2143 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2144 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2145 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2146 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2147 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2148 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2149 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2150 case PGMPOOLKIND_ROOT_PAE_PD:
2151 case PGMPOOLKIND_ROOT_PDPT:
2152 case PGMPOOLKIND_ROOT_NESTED:
2153 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2154 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2155 return 8;
2156
2157 default:
2158 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2159 }
2160}
2161
2162
2163/**
2164 * Gets the entry size of a guest table.
2165 *
2166 * @param enmKind The kind of page.
2167 *
2168 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2169 * @returns If the kind is not for a table, an assertion is raised and 0 is
2170 * returned.
2171 */
2172DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2173{
2174 switch (enmKind)
2175 {
2176 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2177 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2178 case PGMPOOLKIND_ROOT_32BIT_PD:
2179 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2180 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2181 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2182 return 4;
2183
2184 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2185 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2186 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2187 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2188 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2189 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2190 case PGMPOOLKIND_ROOT_PAE_PD:
2191 case PGMPOOLKIND_ROOT_PDPT:
2192 return 8;
2193
2194 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2195 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2196 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2197 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2198 case PGMPOOLKIND_ROOT_NESTED:
2199 /** @todo can we return 0? (nobody is calling this...) */
2200 AssertFailed();
2201 return 0;
2202
2203 default:
2204 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2205 }
2206}
2207
2208
2209#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2210/**
2211 * Scans one shadow page table for mappings of a physical page.
2212 *
2213 * @param pVM The VM handle.
2214 * @param pPhysPage The guest page in question.
2215 * @param iShw The shadow page table.
2216 * @param cRefs The number of references made in that PT.
2217 */
2218static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2219{
2220 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2221 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2222
2223 /*
2224 * Assert sanity.
2225 */
2226 Assert(cRefs == 1);
2227 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2228 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2229
2230 /*
2231 * Then, clear the actual mappings to the page in the shadow PT.
2232 */
2233 switch (pPage->enmKind)
2234 {
2235 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2236 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2237 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2238 {
2239 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2240 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2241 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2242 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2243 {
2244 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2245 pPT->a[i].u = 0;
2246 cRefs--;
2247 if (!cRefs)
2248 return;
2249 }
2250#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2251 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2252 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2253 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2254 {
2255 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2256 pPT->a[i].u = 0;
2257 }
2258#endif
2259 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2260 break;
2261 }
2262
2263 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2264 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2265 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2266 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2267 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2268 {
2269 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2270 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2271 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2272 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2273 {
2274 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2275 pPT->a[i].u = 0;
2276 cRefs--;
2277 if (!cRefs)
2278 return;
2279 }
2280#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2281 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2282 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2283 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2284 {
2285 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2286 pPT->a[i].u = 0;
2287 }
2288#endif
2289 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2290 break;
2291 }
2292
2293 default:
2294 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2295 }
2296}
2297
2298
2299/**
2300 * Scans one shadow page table for mappings of a physical page.
2301 *
2302 * @param pVM The VM handle.
2303 * @param pPhysPage The guest page in question.
2304 * @param iShw The shadow page table.
2305 * @param cRefs The number of references made in that PT.
2306 */
2307void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2308{
2309 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2310 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2311 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2312 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2313 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2314 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2315}
2316
2317
2318/**
2319 * Flushes a list of shadow page tables mapping the same physical page.
2320 *
2321 * @param pVM The VM handle.
2322 * @param pPhysPage The guest page in question.
2323 * @param iPhysExt The physical cross reference extent list to flush.
2324 */
2325void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2326{
2327 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2328 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2329 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2330
2331 const uint16_t iPhysExtStart = iPhysExt;
2332 PPGMPOOLPHYSEXT pPhysExt;
2333 do
2334 {
2335 Assert(iPhysExt < pPool->cMaxPhysExts);
2336 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2337 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2338 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2339 {
2340 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2341 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2342 }
2343
2344 /* next */
2345 iPhysExt = pPhysExt->iNext;
2346 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2347
2348 /* insert the list into the free list and clear the ram range entry. */
2349 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2350 pPool->iPhysExtFreeHead = iPhysExtStart;
2351 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2352
2353 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2354}
2355#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2356
2357
2358/**
2359 * Scans all shadow page tables for mappings of a physical page.
2360 *
2361 * This may be slow, but it's most likely more efficient than cleaning
2362 * out the entire page pool / cache.
2363 *
2364 * @returns VBox status code.
2365 * @retval VINF_SUCCESS if all references has been successfully cleared.
2366 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2367 * a page pool cleaning.
2368 *
2369 * @param pVM The VM handle.
2370 * @param pPhysPage The guest page in question.
2371 */
2372int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2373{
2374 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2375 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2376 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2377 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2378
2379#if 1
2380 /*
2381 * There is a limit to what makes sense.
2382 */
2383 if (pPool->cPresent > 1024)
2384 {
2385 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2386 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2387 return VINF_PGM_GCPHYS_ALIASED;
2388 }
2389#endif
2390
2391 /*
2392 * Iterate all the pages until we've encountered all that in use.
2393 * This is simple but not quite optimal solution.
2394 */
2395 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2396 const uint32_t u32 = u64;
2397 unsigned cLeft = pPool->cUsedPages;
2398 unsigned iPage = pPool->cCurPages;
2399 while (--iPage >= PGMPOOL_IDX_FIRST)
2400 {
2401 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2402 if (pPage->GCPhys != NIL_RTGCPHYS)
2403 {
2404 switch (pPage->enmKind)
2405 {
2406 /*
2407 * We only care about shadow page tables.
2408 */
2409 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2410 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2411 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2412 {
2413 unsigned cPresent = pPage->cPresent;
2414 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2415 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2416 if (pPT->a[i].n.u1Present)
2417 {
2418 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2419 {
2420 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2421 pPT->a[i].u = 0;
2422 }
2423 if (!--cPresent)
2424 break;
2425 }
2426 break;
2427 }
2428
2429 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2430 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2431 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2432 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2433 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2434 {
2435 unsigned cPresent = pPage->cPresent;
2436 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2437 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2438 if (pPT->a[i].n.u1Present)
2439 {
2440 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2441 {
2442 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2443 pPT->a[i].u = 0;
2444 }
2445 if (!--cPresent)
2446 break;
2447 }
2448 break;
2449 }
2450 }
2451 if (!--cLeft)
2452 break;
2453 }
2454 }
2455
2456 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2457 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2458 return VINF_SUCCESS;
2459}
2460
2461
2462/**
2463 * Clears the user entry in a user table.
2464 *
2465 * This is used to remove all references to a page when flushing it.
2466 */
2467static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2468{
2469 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2470 Assert(pUser->iUser < pPool->cCurPages);
2471
2472 /*
2473 * Map the user page.
2474 */
2475 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2476 union
2477 {
2478 uint64_t *pau64;
2479 uint32_t *pau32;
2480 } u;
2481 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pUserPage);
2482
2483 /* Safety precaution in case we change the paging for other modes too in the future. */
2484 Assert(PGMGetHyperCR3(CTXSUFF(pPool->pVM)) != pPage->Core.Key);
2485
2486#ifdef VBOX_STRICT
2487 /*
2488 * Some sanity checks.
2489 */
2490 switch (pUserPage->enmKind)
2491 {
2492 case PGMPOOLKIND_ROOT_32BIT_PD:
2493 Assert(!(u.pau32[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2494 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2495 break;
2496 case PGMPOOLKIND_ROOT_PAE_PD:
2497 Assert(!(u.pau64[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2498 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2499 break;
2500 case PGMPOOLKIND_ROOT_PDPT:
2501 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2502 Assert(pUser->iUserTable < 4);
2503 break;
2504 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2505 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2506 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2507 break;
2508 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2509 Assert(!(u.pau64[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2510 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2511 break;
2512 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2513 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2514 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2515 break;
2516 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2517 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2518 /* GCPhys >> PAGE_SHIFT is the index here */
2519 break;
2520 case PGMPOOLKIND_ROOT_NESTED:
2521 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2522 break;
2523
2524 default:
2525 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2526 break;
2527 }
2528#endif /* VBOX_STRICT */
2529
2530 /*
2531 * Clear the entry in the user page.
2532 */
2533 switch (pUserPage->enmKind)
2534 {
2535 /* 32-bit entries */
2536 case PGMPOOLKIND_ROOT_32BIT_PD:
2537 u.pau32[pUser->iUserTable] = 0;
2538 break;
2539
2540 /* 64-bit entries */
2541 case PGMPOOLKIND_ROOT_PAE_PD:
2542 case PGMPOOLKIND_ROOT_PDPT:
2543 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2544 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2545 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2546 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2547 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2548 case PGMPOOLKIND_ROOT_NESTED:
2549 u.pau64[pUser->iUserTable] = 0;
2550 break;
2551
2552 default:
2553 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2554 }
2555}
2556
2557
2558/**
2559 * Clears all users of a page.
2560 */
2561static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2562{
2563 /*
2564 * Free all the user records.
2565 */
2566 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2567 uint16_t i = pPage->iUserHead;
2568 while (i != NIL_PGMPOOL_USER_INDEX)
2569 {
2570 /* Clear enter in user table. */
2571 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2572
2573 /* Free it. */
2574 const uint16_t iNext = paUsers[i].iNext;
2575 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2576 paUsers[i].iNext = pPool->iUserFreeHead;
2577 pPool->iUserFreeHead = i;
2578
2579 /* Next. */
2580 i = iNext;
2581 }
2582 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2583}
2584
2585
2586#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2587/**
2588 * Allocates a new physical cross reference extent.
2589 *
2590 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2591 * @param pVM The VM handle.
2592 * @param piPhysExt Where to store the phys ext index.
2593 */
2594PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2595{
2596 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2597 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2598 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2599 {
2600 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2601 return NULL;
2602 }
2603 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2604 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2605 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2606 *piPhysExt = iPhysExt;
2607 return pPhysExt;
2608}
2609
2610
2611/**
2612 * Frees a physical cross reference extent.
2613 *
2614 * @param pVM The VM handle.
2615 * @param iPhysExt The extent to free.
2616 */
2617void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2618{
2619 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2620 Assert(iPhysExt < pPool->cMaxPhysExts);
2621 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2622 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2623 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2624 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2625 pPool->iPhysExtFreeHead = iPhysExt;
2626}
2627
2628
2629/**
2630 * Frees a physical cross reference extent.
2631 *
2632 * @param pVM The VM handle.
2633 * @param iPhysExt The extent to free.
2634 */
2635void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2636{
2637 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2638
2639 const uint16_t iPhysExtStart = iPhysExt;
2640 PPGMPOOLPHYSEXT pPhysExt;
2641 do
2642 {
2643 Assert(iPhysExt < pPool->cMaxPhysExts);
2644 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2645 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2646 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2647
2648 /* next */
2649 iPhysExt = pPhysExt->iNext;
2650 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2651
2652 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2653 pPool->iPhysExtFreeHead = iPhysExtStart;
2654}
2655
2656/**
2657 * Insert a reference into a list of physical cross reference extents.
2658 *
2659 * @returns The new ram range flags (top 16-bits).
2660 *
2661 * @param pVM The VM handle.
2662 * @param iPhysExt The physical extent index of the list head.
2663 * @param iShwPT The shadow page table index.
2664 *
2665 */
2666static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2667{
2668 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2669 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2670
2671 /* special common case. */
2672 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2673 {
2674 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2675 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2676 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2677 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2678 }
2679
2680 /* general treatment. */
2681 const uint16_t iPhysExtStart = iPhysExt;
2682 unsigned cMax = 15;
2683 for (;;)
2684 {
2685 Assert(iPhysExt < pPool->cMaxPhysExts);
2686 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2687 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2688 {
2689 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2690 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2691 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2692 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2693 }
2694 if (!--cMax)
2695 {
2696 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2697 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2698 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2699 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2700 }
2701 }
2702
2703 /* add another extent to the list. */
2704 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2705 if (!pNew)
2706 {
2707 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2708 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2709 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2710 }
2711 pNew->iNext = iPhysExtStart;
2712 pNew->aidx[0] = iShwPT;
2713 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2714 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2715}
2716
2717
2718/**
2719 * Add a reference to guest physical page where extents are in use.
2720 *
2721 * @returns The new ram range flags (top 16-bits).
2722 *
2723 * @param pVM The VM handle.
2724 * @param u16 The ram range flags (top 16-bits).
2725 * @param iShwPT The shadow page table index.
2726 */
2727uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2728{
2729 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2730 {
2731 /*
2732 * Convert to extent list.
2733 */
2734 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2735 uint16_t iPhysExt;
2736 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2737 if (pPhysExt)
2738 {
2739 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2740 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2741 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2742 pPhysExt->aidx[1] = iShwPT;
2743 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2744 }
2745 else
2746 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2747 }
2748 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2749 {
2750 /*
2751 * Insert into the extent list.
2752 */
2753 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2754 }
2755 else
2756 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2757 return u16;
2758}
2759
2760
2761/**
2762 * Clear references to guest physical memory.
2763 *
2764 * @param pPool The pool.
2765 * @param pPage The page.
2766 * @param pPhysPage Pointer to the aPages entry in the ram range.
2767 */
2768void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
2769{
2770 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
2771 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2772
2773 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2774 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2775 {
2776 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2777 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2778 do
2779 {
2780 Assert(iPhysExt < pPool->cMaxPhysExts);
2781
2782 /*
2783 * Look for the shadow page and check if it's all freed.
2784 */
2785 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2786 {
2787 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2788 {
2789 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2790
2791 for (i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2792 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2793 {
2794 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2795 return;
2796 }
2797
2798 /* we can free the node. */
2799 PVM pVM = pPool->CTXSUFF(pVM);
2800 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2801 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2802 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2803 {
2804 /* lonely node */
2805 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2806 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
2807 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2808 }
2809 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2810 {
2811 /* head */
2812 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
2813 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
2814 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2815 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2816 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2817 }
2818 else
2819 {
2820 /* in list */
2821 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2822 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2823 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2824 }
2825 iPhysExt = iPhysExtNext;
2826 return;
2827 }
2828 }
2829
2830 /* next */
2831 iPhysExtPrev = iPhysExt;
2832 iPhysExt = paPhysExts[iPhysExt].iNext;
2833 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2834
2835 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2836 }
2837 else /* nothing to do */
2838 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
2839}
2840
2841
2842
2843/**
2844 * Clear references to guest physical memory.
2845 *
2846 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
2847 * is assumed to be correct, so the linear search can be skipped and we can assert
2848 * at an earlier point.
2849 *
2850 * @param pPool The pool.
2851 * @param pPage The page.
2852 * @param HCPhys The host physical address corresponding to the guest page.
2853 * @param GCPhys The guest physical address corresponding to HCPhys.
2854 */
2855static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
2856{
2857 /*
2858 * Walk range list.
2859 */
2860 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
2861 while (pRam)
2862 {
2863 RTGCPHYS off = GCPhys - pRam->GCPhys;
2864 if (off < pRam->cb)
2865 {
2866 /* does it match? */
2867 const unsigned iPage = off >> PAGE_SHIFT;
2868 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
2869 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2870 {
2871 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2872 return;
2873 }
2874 break;
2875 }
2876 pRam = CTXALLSUFF(pRam->pNext);
2877 }
2878 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
2879}
2880
2881
2882/**
2883 * Clear references to guest physical memory.
2884 *
2885 * @param pPool The pool.
2886 * @param pPage The page.
2887 * @param HCPhys The host physical address corresponding to the guest page.
2888 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
2889 */
2890static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
2891{
2892 /*
2893 * Walk range list.
2894 */
2895 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
2896 while (pRam)
2897 {
2898 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
2899 if (off < pRam->cb)
2900 {
2901 /* does it match? */
2902 const unsigned iPage = off >> PAGE_SHIFT;
2903 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
2904 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2905 {
2906 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2907 return;
2908 }
2909 break;
2910 }
2911 pRam = CTXALLSUFF(pRam->pNext);
2912 }
2913
2914 /*
2915 * Damn, the hint didn't work. We'll have to do an expensive linear search.
2916 */
2917 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
2918 pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
2919 while (pRam)
2920 {
2921 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2922 while (iPage-- > 0)
2923 {
2924 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2925 {
2926 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
2927 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
2928 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2929 return;
2930 }
2931 }
2932 pRam = CTXALLSUFF(pRam->pNext);
2933 }
2934
2935 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
2936}
2937
2938
2939/**
2940 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
2941 *
2942 * @param pPool The pool.
2943 * @param pPage The page.
2944 * @param pShwPT The shadow page table (mapping of the page).
2945 * @param pGstPT The guest page table.
2946 */
2947DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
2948{
2949 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pShwPT->a); i++)
2950 if (pShwPT->a[i].n.u1Present)
2951 {
2952 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
2953 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2954 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2955 if (!--pPage->cPresent)
2956 break;
2957 }
2958}
2959
2960
2961/**
2962 * Clear references to guest physical memory in a PAE / 32-bit page table.
2963 *
2964 * @param pPool The pool.
2965 * @param pPage The page.
2966 * @param pShwPT The shadow page table (mapping of the page).
2967 * @param pGstPT The guest page table (just a half one).
2968 */
2969DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
2970{
2971 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2972 if (pShwPT->a[i].n.u1Present)
2973 {
2974 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2975 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2976 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2977 }
2978}
2979
2980
2981/**
2982 * Clear references to guest physical memory in a PAE / PAE page table.
2983 *
2984 * @param pPool The pool.
2985 * @param pPage The page.
2986 * @param pShwPT The shadow page table (mapping of the page).
2987 * @param pGstPT The guest page table.
2988 */
2989DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
2990{
2991 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2992 if (pShwPT->a[i].n.u1Present)
2993 {
2994 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
2995 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
2996 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
2997 }
2998}
2999
3000
3001/**
3002 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3003 *
3004 * @param pPool The pool.
3005 * @param pPage The page.
3006 * @param pShwPT The shadow page table (mapping of the page).
3007 */
3008DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3009{
3010 RTGCPHYS GCPhys = pPage->GCPhys;
3011 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3012 if (pShwPT->a[i].n.u1Present)
3013 {
3014 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3015 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3016 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3017 }
3018}
3019
3020
3021/**
3022 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3023 *
3024 * @param pPool The pool.
3025 * @param pPage The page.
3026 * @param pShwPT The shadow page table (mapping of the page).
3027 */
3028DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3029{
3030 RTGCPHYS GCPhys = pPage->GCPhys;
3031 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3032 if (pShwPT->a[i].n.u1Present)
3033 {
3034 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3035 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3036 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3037 }
3038}
3039#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3040
3041
3042/**
3043 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3044 *
3045 * @param pPool The pool.
3046 * @param pPage The page.
3047 * @param pShwPD The shadow page directory (mapping of the page).
3048 */
3049DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3050{
3051 for (unsigned i = 0; i < ELEMENTS(pShwPD->a); i++)
3052 {
3053 if (pShwPD->a[i].n.u1Present)
3054 {
3055 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3056 if (pSubPage)
3057 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3058 else
3059 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3060 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3061 }
3062 }
3063}
3064
3065
3066/**
3067 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3068 *
3069 * @param pPool The pool.
3070 * @param pPage The page.
3071 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3072 */
3073DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3074{
3075 for (unsigned i = 0; i < ELEMENTS(pShwPDPT->a); i++)
3076 {
3077 if (pShwPDPT->a[i].n.u1Present)
3078 {
3079 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3080 if (pSubPage)
3081 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3082 else
3083 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3084 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3085 }
3086 }
3087}
3088
3089/**
3090 * Clear references to shadowed pages in a 64-bit level 4 page table.
3091 *
3092 * @param pPool The pool.
3093 * @param pPage The page.
3094 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3095 */
3096DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3097{
3098 for (unsigned i = 0; i < ELEMENTS(pShwPML4->a); i++)
3099 {
3100 if (pShwPML4->a[i].n.u1Present)
3101 {
3102 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3103 if (pSubPage)
3104 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3105 else
3106 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3107 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3108 }
3109 }
3110}
3111
3112
3113/**
3114 * Clears all references made by this page.
3115 *
3116 * This includes other shadow pages and GC physical addresses.
3117 *
3118 * @param pPool The pool.
3119 * @param pPage The page.
3120 */
3121static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3122{
3123 /*
3124 * Map the shadow page and take action according to the page kind.
3125 */
3126 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
3127 switch (pPage->enmKind)
3128 {
3129#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3130 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3131 {
3132 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3133 void *pvGst;
3134 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3135 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3136 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3137 break;
3138 }
3139
3140 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3141 {
3142 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3143 void *pvGst;
3144 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3145 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3146 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3147 break;
3148 }
3149
3150 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3151 {
3152 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3153 void *pvGst;
3154 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3155 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3156 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3157 break;
3158 }
3159
3160 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3161 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3162 {
3163 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3164 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3165 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3166 break;
3167 }
3168
3169 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 4 MB page */
3170 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3171 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3172 {
3173 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3174 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3175 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3176 break;
3177 }
3178
3179#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3180 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3181 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3182 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3183 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3184 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3185 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3186 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3187 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3188 break;
3189#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3190
3191 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
3192 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3193 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3194 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3195 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3196 break;
3197
3198 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3199 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3200 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3201 break;
3202
3203 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
3204 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3205 break;
3206
3207 default:
3208 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3209 }
3210
3211 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3212 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3213 ASMMemZeroPage(pvShw);
3214 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3215 pPage->fZeroed = true;
3216}
3217#endif /* PGMPOOL_WITH_USER_TRACKING */
3218
3219
3220/**
3221 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3222 *
3223 * @param pPool The pool.
3224 */
3225static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3226{
3227 /*
3228 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3229 */
3230 Assert(NIL_PGMPOOL_IDX == 0);
3231 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3232 {
3233 /*
3234 * Get the page address.
3235 */
3236 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3237 union
3238 {
3239 uint64_t *pau64;
3240 uint32_t *pau32;
3241 } u;
3242 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
3243
3244 /*
3245 * Mark stuff not present.
3246 */
3247 switch (pPage->enmKind)
3248 {
3249 case PGMPOOLKIND_ROOT_32BIT_PD:
3250 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3251 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3252 u.pau32[iPage] = 0;
3253 break;
3254
3255 case PGMPOOLKIND_ROOT_PAE_PD:
3256 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * X86_PG_PAE_PDPE_ENTRIES; iPage++)
3257 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3258 u.pau64[iPage] = 0;
3259 break;
3260
3261 case PGMPOOLKIND_ROOT_PDPT:
3262 /* Not root of shadowed pages currently, ignore it. */
3263 break;
3264
3265 case PGMPOOLKIND_ROOT_NESTED:
3266 ASMMemZero32(u.pau64, PAGE_SIZE);
3267 break;
3268 }
3269 }
3270
3271 /*
3272 * Paranoia (to be removed), flag a global CR3 sync.
3273 */
3274 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
3275}
3276
3277
3278/**
3279 * Flushes the entire cache.
3280 *
3281 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3282 * and execute this CR3 flush.
3283 *
3284 * @param pPool The pool.
3285 */
3286static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3287{
3288 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3289 LogFlow(("pgmPoolFlushAllInt:\n"));
3290
3291 /*
3292 * If there are no pages in the pool, there is nothing to do.
3293 */
3294 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3295 {
3296 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3297 return;
3298 }
3299
3300 /*
3301 * Nuke the free list and reinsert all pages into it.
3302 */
3303 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3304 {
3305 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3306
3307#ifdef IN_RING3
3308 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
3309#endif
3310#ifdef PGMPOOL_WITH_MONITORING
3311 if (pPage->fMonitored)
3312 pgmPoolMonitorFlush(pPool, pPage);
3313 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3314 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3315 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3316 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3317 pPage->cModifications = 0;
3318#endif
3319 pPage->GCPhys = NIL_RTGCPHYS;
3320 pPage->enmKind = PGMPOOLKIND_FREE;
3321 Assert(pPage->idx == i);
3322 pPage->iNext = i + 1;
3323 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3324 pPage->fSeenNonGlobal = false;
3325 pPage->fMonitored= false;
3326 pPage->fCached = false;
3327 pPage->fReusedFlushPending = false;
3328 pPage->fCR3Mix = false;
3329#ifdef PGMPOOL_WITH_USER_TRACKING
3330 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3331#endif
3332#ifdef PGMPOOL_WITH_CACHE
3333 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3334 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3335#endif
3336 }
3337 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3338 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3339 pPool->cUsedPages = 0;
3340
3341#ifdef PGMPOOL_WITH_USER_TRACKING
3342 /*
3343 * Zap and reinitialize the user records.
3344 */
3345 pPool->cPresent = 0;
3346 pPool->iUserFreeHead = 0;
3347 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3348 const unsigned cMaxUsers = pPool->cMaxUsers;
3349 for (unsigned i = 0; i < cMaxUsers; i++)
3350 {
3351 paUsers[i].iNext = i + 1;
3352 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3353 paUsers[i].iUserTable = 0xfffffffe;
3354 }
3355 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3356#endif
3357
3358#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3359 /*
3360 * Clear all the GCPhys links and rebuild the phys ext free list.
3361 */
3362 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3363 pRam;
3364 pRam = CTXALLSUFF(pRam->pNext))
3365 {
3366 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3367 while (iPage-- > 0)
3368 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3369 }
3370
3371 pPool->iPhysExtFreeHead = 0;
3372 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3373 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3374 for (unsigned i = 0; i < cMaxPhysExts; i++)
3375 {
3376 paPhysExts[i].iNext = i + 1;
3377 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3378 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3379 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3380 }
3381 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3382#endif
3383
3384#ifdef PGMPOOL_WITH_MONITORING
3385 /*
3386 * Just zap the modified list.
3387 */
3388 pPool->cModifiedPages = 0;
3389 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3390#endif
3391
3392#ifdef PGMPOOL_WITH_CACHE
3393 /*
3394 * Clear the GCPhys hash and the age list.
3395 */
3396 for (unsigned i = 0; i < ELEMENTS(pPool->aiHash); i++)
3397 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3398 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3399 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3400#endif
3401
3402 /*
3403 * Flush all the special root pages.
3404 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3405 */
3406 pgmPoolFlushAllSpecialRoots(pPool);
3407 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3408 {
3409 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3410 pPage->iNext = NIL_PGMPOOL_IDX;
3411#ifdef PGMPOOL_WITH_MONITORING
3412 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3413 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3414 pPage->cModifications = 0;
3415 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3416 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3417 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3418 if (pPage->fMonitored)
3419 {
3420 PVM pVM = pPool->CTXSUFF(pVM);
3421 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3422 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3423 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3424 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
3425 pPool->pszAccessHandler);
3426 AssertFatalRCSuccess(rc);
3427# ifdef PGMPOOL_WITH_CACHE
3428 pgmPoolHashInsert(pPool, pPage);
3429# endif
3430 }
3431#endif
3432#ifdef PGMPOOL_WITH_USER_TRACKING
3433 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3434#endif
3435#ifdef PGMPOOL_WITH_CACHE
3436 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3437 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3438#endif
3439 }
3440
3441 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3442}
3443
3444
3445/**
3446 * Flushes a pool page.
3447 *
3448 * This moves the page to the free list after removing all user references to it.
3449 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3450 *
3451 * @returns VBox status code.
3452 * @retval VINF_SUCCESS on success.
3453 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3454 * @param pPool The pool.
3455 * @param HCPhys The HC physical address of the shadow page.
3456 */
3457int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3458{
3459 int rc = VINF_SUCCESS;
3460 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3461 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3462 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3463
3464 /*
3465 * Quietly reject any attempts at flushing any of the special root pages.
3466 */
3467 if (pPage->idx < PGMPOOL_IDX_FIRST)
3468 {
3469 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3470 return VINF_SUCCESS;
3471 }
3472
3473 /*
3474 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
3475 */
3476 if ( pPage->enmKind == PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4
3477 && PGMGetHyperCR3(CTXSUFF(pPool->pVM)) == pPage->Core.Key)
3478 {
3479 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3480 return VINF_SUCCESS;
3481 }
3482 /* Safety precaution in case we change the paging for other modes too in the future. */
3483 AssertFatal(PGMGetHyperCR3(CTXSUFF(pPool->pVM)) != pPage->Core.Key);
3484
3485 /*
3486 * Mark the page as being in need of a ASMMemZeroPage().
3487 */
3488 pPage->fZeroed = false;
3489
3490#ifdef PGMPOOL_WITH_USER_TRACKING
3491 /*
3492 * Clear the page.
3493 */
3494 pgmPoolTrackClearPageUsers(pPool, pPage);
3495 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3496 pgmPoolTrackDeref(pPool, pPage);
3497 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3498#endif
3499
3500#ifdef PGMPOOL_WITH_CACHE
3501 /*
3502 * Flush it from the cache.
3503 */
3504 pgmPoolCacheFlushPage(pPool, pPage);
3505#endif /* PGMPOOL_WITH_CACHE */
3506
3507#ifdef PGMPOOL_WITH_MONITORING
3508 /*
3509 * Deregistering the monitoring.
3510 */
3511 if (pPage->fMonitored)
3512 rc = pgmPoolMonitorFlush(pPool, pPage);
3513#endif
3514
3515 /*
3516 * Free the page.
3517 */
3518 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3519 pPage->iNext = pPool->iFreeHead;
3520 pPool->iFreeHead = pPage->idx;
3521 pPage->enmKind = PGMPOOLKIND_FREE;
3522 pPage->GCPhys = NIL_RTGCPHYS;
3523 pPage->fReusedFlushPending = false;
3524
3525 pPool->cUsedPages--;
3526 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3527 return rc;
3528}
3529
3530
3531/**
3532 * Frees a usage of a pool page.
3533 *
3534 * The caller is responsible to updating the user table so that it no longer
3535 * references the shadow page.
3536 *
3537 * @param pPool The pool.
3538 * @param HCPhys The HC physical address of the shadow page.
3539 * @param iUser The shadow page pool index of the user table.
3540 * @param iUserTable The index into the user table (shadowed).
3541 */
3542void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3543{
3544 STAM_PROFILE_START(&pPool->StatFree, a);
3545 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3546 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3547 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3548#ifdef PGMPOOL_WITH_USER_TRACKING
3549 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3550#endif
3551#ifdef PGMPOOL_WITH_CACHE
3552 if (!pPage->fCached)
3553#endif
3554 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3555 STAM_PROFILE_STOP(&pPool->StatFree, a);
3556}
3557
3558
3559/**
3560 * Makes one or more free page free.
3561 *
3562 * @returns VBox status code.
3563 * @retval VINF_SUCCESS on success.
3564 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3565 *
3566 * @param pPool The pool.
3567 * @param iUser The user of the page.
3568 */
3569static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3570{
3571 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3572
3573 /*
3574 * If the pool isn't full grown yet, expand it.
3575 */
3576 if (pPool->cCurPages < pPool->cMaxPages)
3577 {
3578 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3579#ifdef IN_RING3
3580 int rc = PGMR3PoolGrow(pPool->pVMHC);
3581#else
3582 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3583#endif
3584 if (VBOX_FAILURE(rc))
3585 return rc;
3586 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3587 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3588 return VINF_SUCCESS;
3589 }
3590
3591#ifdef PGMPOOL_WITH_CACHE
3592 /*
3593 * Free one cached page.
3594 */
3595 return pgmPoolCacheFreeOne(pPool, iUser);
3596#else
3597 /*
3598 * Flush the pool.
3599 * If we have tracking enabled, it should be possible to come up with
3600 * a cheap replacement strategy...
3601 */
3602 pgmPoolFlushAllInt(pPool);
3603 return VERR_PGM_POOL_FLUSHED;
3604#endif
3605}
3606
3607
3608/**
3609 * Allocates a page from the pool.
3610 *
3611 * This page may actually be a cached page and not in need of any processing
3612 * on the callers part.
3613 *
3614 * @returns VBox status code.
3615 * @retval VINF_SUCCESS if a NEW page was allocated.
3616 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3617 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3618 * @param pVM The VM handle.
3619 * @param GCPhys The GC physical address of the page we're gonna shadow.
3620 * For 4MB and 2MB PD entries, it's the first address the
3621 * shadow PT is covering.
3622 * @param enmKind The kind of mapping.
3623 * @param iUser The shadow page pool index of the user table.
3624 * @param iUserTable The index into the user table (shadowed).
3625 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3626 */
3627int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
3628{
3629 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3630 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3631 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3632 *ppPage = NULL;
3633
3634#ifdef PGMPOOL_WITH_CACHE
3635 if (pPool->fCacheEnabled)
3636 {
3637 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3638 if (VBOX_SUCCESS(rc2))
3639 {
3640 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3641 LogFlow(("pgmPoolAlloc: cached returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3642 return rc2;
3643 }
3644 }
3645#endif
3646
3647 /*
3648 * Allocate a new one.
3649 */
3650 int rc = VINF_SUCCESS;
3651 uint16_t iNew = pPool->iFreeHead;
3652 if (iNew == NIL_PGMPOOL_IDX)
3653 {
3654 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3655 if (VBOX_FAILURE(rc))
3656 {
3657 if (rc != VERR_PGM_POOL_CLEARED)
3658 {
3659 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3660 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3661 return rc;
3662 }
3663 rc = VERR_PGM_POOL_FLUSHED;
3664 }
3665 iNew = pPool->iFreeHead;
3666 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3667 }
3668
3669 /* unlink the free head */
3670 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3671 pPool->iFreeHead = pPage->iNext;
3672 pPage->iNext = NIL_PGMPOOL_IDX;
3673
3674 /*
3675 * Initialize it.
3676 */
3677 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3678 pPage->enmKind = enmKind;
3679 pPage->GCPhys = GCPhys;
3680 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3681 pPage->fMonitored = false;
3682 pPage->fCached = false;
3683 pPage->fReusedFlushPending = false;
3684 pPage->fCR3Mix = false;
3685#ifdef PGMPOOL_WITH_MONITORING
3686 pPage->cModifications = 0;
3687 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3688 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3689#endif
3690#ifdef PGMPOOL_WITH_USER_TRACKING
3691 pPage->cPresent = 0;
3692 pPage->iFirstPresent = ~0;
3693
3694 /*
3695 * Insert into the tracking and cache. If this fails, free the page.
3696 */
3697 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3698 if (VBOX_FAILURE(rc3))
3699 {
3700 if (rc3 != VERR_PGM_POOL_CLEARED)
3701 {
3702 pPool->cUsedPages--;
3703 pPage->enmKind = PGMPOOLKIND_FREE;
3704 pPage->GCPhys = NIL_RTGCPHYS;
3705 pPage->iNext = pPool->iFreeHead;
3706 pPool->iFreeHead = pPage->idx;
3707 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3708 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3709 return rc3;
3710 }
3711 rc = VERR_PGM_POOL_FLUSHED;
3712 }
3713#endif /* PGMPOOL_WITH_USER_TRACKING */
3714
3715 /*
3716 * Commit the allocation, clear the page and return.
3717 */
3718#ifdef VBOX_WITH_STATISTICS
3719 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3720 pPool->cUsedPagesHigh = pPool->cUsedPages;
3721#endif
3722
3723 if (!pPage->fZeroed)
3724 {
3725 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3726 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3727 ASMMemZeroPage(pv);
3728 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3729 }
3730
3731 *ppPage = pPage;
3732 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3733 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3734 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3735 return rc;
3736}
3737
3738
3739/**
3740 * Frees a usage of a pool page.
3741 *
3742 * @param pVM The VM handle.
3743 * @param HCPhys The HC physical address of the shadow page.
3744 * @param iUser The shadow page pool index of the user table.
3745 * @param iUserTable The index into the user table (shadowed).
3746 */
3747void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
3748{
3749 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
3750 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3751 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
3752}
3753
3754
3755/**
3756 * Gets a in-use page in the pool by it's physical address.
3757 *
3758 * @returns Pointer to the page.
3759 * @param pVM The VM handle.
3760 * @param HCPhys The HC physical address of the shadow page.
3761 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
3762 */
3763PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
3764{
3765 /** @todo profile this! */
3766 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3767 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
3768 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
3769 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
3770 return pPage;
3771}
3772
3773
3774/**
3775 * Flushes the entire cache.
3776 *
3777 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3778 * and execute this CR3 flush.
3779 *
3780 * @param pPool The pool.
3781 */
3782void pgmPoolFlushAll(PVM pVM)
3783{
3784 LogFlow(("pgmPoolFlushAll:\n"));
3785 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
3786}
3787
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette