VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 9663

Last change on this file since 9663 was 9620, checked in by vboxsync, 17 years ago

Long mode paging updates

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 126.2 KB
Line 
1/* $Id: PGMAllPool.cpp 9620 2008-06-11 16:10:52Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_GC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37
38#include <VBox/log.h>
39#include <VBox/err.h>
40#include <iprt/asm.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46__BEGIN_DECLS
47static void pgmPoolFlushAllInt(PPGMPOOL pPool);
48#ifdef PGMPOOL_WITH_USER_TRACKING
49DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
50DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
51static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#endif
53#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
54static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
55#endif
56#ifdef PGMPOOL_WITH_CACHE
57static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable);
58#endif
59#ifdef PGMPOOL_WITH_MONITORING
60static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
61#endif
62#ifndef IN_RING3
63DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
64#endif
65__END_DECLS
66
67
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86
87
88#ifdef IN_GC
89/**
90 * Maps a pool page into the current context.
91 *
92 * @returns Pointer to the mapping.
93 * @param pVM The VM handle.
94 * @param pPage The page to map.
95 */
96void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
97{
98 /* general pages. */
99 if (pPage->idx >= PGMPOOL_IDX_FIRST)
100 {
101 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
102 void *pv;
103 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
104 AssertReleaseRC(rc);
105 return pv;
106 }
107
108 /* special pages. */
109 switch (pPage->idx)
110 {
111 case PGMPOOL_IDX_PD:
112 return pVM->pgm.s.pGC32BitPD;
113 case PGMPOOL_IDX_PAE_PD:
114 case PGMPOOL_IDX_PAE_PD_0:
115 return pVM->pgm.s.apGCPaePDs[0];
116 case PGMPOOL_IDX_PAE_PD_1:
117 return pVM->pgm.s.apGCPaePDs[1];
118 case PGMPOOL_IDX_PAE_PD_2:
119 return pVM->pgm.s.apGCPaePDs[2];
120 case PGMPOOL_IDX_PAE_PD_3:
121 return pVM->pgm.s.apGCPaePDs[3];
122 case PGMPOOL_IDX_PDPT:
123 return pVM->pgm.s.pGCPaePDPT;
124 default:
125 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
126 return NULL;
127 }
128}
129#endif /* IN_GC */
130
131
132#ifdef PGMPOOL_WITH_MONITORING
133/**
134 * Determin the size of a write instruction.
135 * @returns number of bytes written.
136 * @param pDis The disassembler state.
137 */
138static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
139{
140 /*
141 * This is very crude and possibly wrong for some opcodes,
142 * but since it's not really supposed to be called we can
143 * probably live with that.
144 */
145 return DISGetParamSize(pDis, &pDis->param1);
146}
147
148
149/**
150 * Flushes a chain of pages sharing the same access monitor.
151 *
152 * @returns VBox status code suitable for scheduling.
153 * @param pPool The pool.
154 * @param pPage A page in the chain.
155 */
156int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
157{
158 LogFlow(("pgmPoolMonitorChainFlush: Flush page %VGp type=%d\n", pPage->GCPhys, pPage->enmKind));
159
160 /*
161 * Find the list head.
162 */
163 uint16_t idx = pPage->idx;
164 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
165 {
166 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
167 {
168 idx = pPage->iMonitoredPrev;
169 Assert(idx != pPage->idx);
170 pPage = &pPool->aPages[idx];
171 }
172 }
173
174 /*
175 * Itereate the list flushing each shadow page.
176 */
177 int rc = VINF_SUCCESS;
178 for (;;)
179 {
180 idx = pPage->iMonitoredNext;
181 Assert(idx != pPage->idx);
182 if (pPage->idx >= PGMPOOL_IDX_FIRST)
183 {
184 int rc2 = pgmPoolFlushPage(pPool, pPage);
185 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
186 rc = VINF_PGM_SYNC_CR3;
187 }
188 /* next */
189 if (idx == NIL_PGMPOOL_IDX)
190 break;
191 pPage = &pPool->aPages[idx];
192 }
193 return rc;
194}
195
196
197/**
198 * Wrapper for getting the current context pointer to the entry being modified.
199 *
200 * @returns Pointer to the current context mapping of the entry.
201 * @param pPool The pool.
202 * @param pvFault The fault virtual address.
203 * @param GCPhysFault The fault physical address.
204 * @param cbEntry The entry size.
205 */
206#ifdef IN_RING3
207DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
208#else
209DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
210#endif
211{
212#ifdef IN_GC
213 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
214
215#elif defined(IN_RING0)
216 void *pvRet;
217 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
218 AssertFatalRCSuccess(rc);
219 return pvRet;
220
221#elif defined(IN_RING3)
222 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
223#else
224# error "huh?"
225#endif
226}
227
228
229/**
230 * Process shadow entries before they are changed by the guest.
231 *
232 * For PT entries we will clear them. For PD entries, we'll simply check
233 * for mapping conflicts and set the SyncCR3 FF if found.
234 *
235 * @param pPool The pool.
236 * @param pPage The head page.
237 * @param GCPhysFault The guest physical fault address.
238 * @param uAddress In R0 and GC this is the guest context fault address (flat).
239 * In R3 this is the host context 'fault' address.
240 * @param pCpu The disassembler state for figuring out the write size.
241 * This need not be specified if the caller knows we won't do cross entry accesses.
242 */
243#ifdef IN_RING3
244void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
245#else
246void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
247#endif
248{
249 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
250 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
251
252 LogFlow(("pgmPoolMonitorChainChanging: %VGv phys=%VGp kind=%d\n", pvAddress, GCPhysFault, pPage->enmKind));
253
254 for (;;)
255 {
256 union
257 {
258 void *pv;
259 PX86PT pPT;
260 PX86PTPAE pPTPae;
261 PX86PD pPD;
262 PX86PDPAE pPDPae;
263 PX86PDPT pPDPT;
264 } uShw;
265 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
266
267 switch (pPage->enmKind)
268 {
269 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
270 {
271 const unsigned iShw = off / sizeof(X86PTE);
272 if (uShw.pPT->a[iShw].n.u1Present)
273 {
274# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
275 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
276 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
277 pgmPoolTracDerefGCPhysHint(pPool, pPage,
278 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
279 pGstPte->u & X86_PTE_PG_MASK);
280# endif
281 uShw.pPT->a[iShw].u = 0;
282 }
283 break;
284 }
285
286 /* page/2 sized */
287 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
288 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
289 {
290 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
291 if (uShw.pPTPae->a[iShw].n.u1Present)
292 {
293# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
294 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
295 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
296 pgmPoolTracDerefGCPhysHint(pPool, pPage,
297 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
298 pGstPte->u & X86_PTE_PG_MASK);
299# endif
300 uShw.pPTPae->a[iShw].u = 0;
301 }
302 }
303 break;
304
305 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
306 {
307 const unsigned iShw = off / sizeof(X86PTEPAE);
308 if (uShw.pPTPae->a[iShw].n.u1Present)
309 {
310# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
311 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
312 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
313 pgmPoolTracDerefGCPhysHint(pPool, pPage,
314 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
315 pGstPte->u & X86_PTE_PAE_PG_MASK);
316# endif
317 uShw.pPTPae->a[iShw].u = 0;
318 }
319 break;
320 }
321
322 case PGMPOOLKIND_ROOT_32BIT_PD:
323 {
324 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
325 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
326 {
327 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
328 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
329 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
330 }
331 /* paranoia / a bit assumptive. */
332 else if ( pCpu
333 && (off & 3)
334 && (off & 3) + pgmPoolDisasWriteSize(pCpu) > 4)
335 {
336 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PTE);
337 if ( iShw2 != iShw
338 && iShw2 < ELEMENTS(uShw.pPD->a)
339 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
340 {
341 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
342 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
343 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
344 }
345 }
346#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
347 if ( uShw.pPD->a[iShw].n.u1Present
348 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
349 {
350 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
351# ifdef IN_GC /* TLB load - we're pushing things a bit... */
352 ASMProbeReadByte(pvAddress);
353# endif
354 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
355 uShw.pPD->a[iShw].u = 0;
356 }
357#endif
358 break;
359 }
360
361 case PGMPOOLKIND_ROOT_PAE_PD:
362 {
363 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
364 for (unsigned i = 0; i < 2; i++, iShw++)
365 {
366 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
367 {
368 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
369 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
370 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
371 }
372 /* paranoia / a bit assumptive. */
373 else if ( pCpu
374 && (off & 3)
375 && (off & 3) + pgmPoolDisasWriteSize(pCpu) > 4)
376 {
377 const unsigned iShw2 = iShw + 2;
378 if ( iShw2 < ELEMENTS(uShw.pPDPae->a)
379 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
380 {
381 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
382 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
383 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
384 }
385 }
386#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
387 if ( uShw.pPDPae->a[iShw].n.u1Present
388 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
389 {
390 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
391# ifdef IN_GC /* TLB load - we're pushing things a bit... */
392 ASMProbeReadByte(pvAddress);
393# endif
394 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
395 uShw.pPDPae->a[iShw].u = 0;
396 }
397#endif
398 }
399 break;
400 }
401
402 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
403 {
404 const unsigned iShw = off / sizeof(X86PTEPAE);
405 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
406 {
407 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
408 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
409 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
410 }
411 /* paranoia / a bit assumptive. */
412 else if ( pCpu
413 && (off & 7)
414 && (off & 7) + pgmPoolDisasWriteSize(pCpu) > sizeof(X86PTEPAE))
415 {
416 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PTEPAE);
417 if ( iShw2 != iShw
418 && iShw2 < ELEMENTS(uShw.pPDPae->a)
419 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
420 {
421 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
422 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
423 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
424 }
425 }
426#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
427 if ( uShw.pPDPae->a[iShw].n.u1Present
428 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
429 {
430 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
431# ifdef IN_GC /* TLB load - we're pushing things a bit... */
432 ASMProbeReadByte(pvAddress);
433# endif
434 pgmPoolFree(pPool->CTXSUFF(pVM), uShwpPDPae->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
435 uShw.pPDPae->a[iShw].u = 0;
436 }
437#endif
438 break;
439 }
440
441 case PGMPOOLKIND_ROOT_PDPT:
442 {
443 /* Hopefully this doesn't happen very often:
444 * - touching unused parts of the page
445 * - messing with the bits of pd pointers without changing the physical address
446 */
447 const unsigned iShw = off / sizeof(X86PDPE);
448 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
449 {
450 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
451 {
452 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
453 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
454 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
455 }
456 /* paranoia / a bit assumptive. */
457 else if ( pCpu
458 && (off & 7)
459 && (off & 7) + pgmPoolDisasWriteSize(pCpu) > sizeof(X86PDPE))
460 {
461 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PDPE);
462 if ( iShw2 != iShw
463 && iShw2 < X86_PG_PAE_PDPE_ENTRIES
464 && uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
465 {
466 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
467 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
468 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
469 }
470 }
471 }
472 break;
473 }
474
475 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
476 {
477 /* Hopefully this doesn't happen very often:
478 * - messing with the bits of pd pointers without changing the physical address
479 */
480#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
481 const unsigned iShw = off / sizeof(X86PDPE);
482 if ( uShw.pPDPT->a[iShw].n.u1Present
483 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
484 {
485 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
486 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
487 uShw.pPDPT->a[iShw].u = 0;
488 }
489#endif
490 break;
491 }
492
493 default:
494 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
495 }
496
497 /* next */
498 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
499 return;
500 pPage = &pPool->aPages[pPage->iMonitoredNext];
501 }
502}
503
504
505# ifndef IN_RING3
506/**
507 * Checks if a access could be a fork operation in progress.
508 *
509 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
510 *
511 * @returns true if it's likly that we're forking, otherwise false.
512 * @param pPool The pool.
513 * @param pCpu The disassembled instruction.
514 * @param offFault The access offset.
515 */
516DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
517{
518 /*
519 * i386 linux is using btr to clear X86_PTE_RW.
520 * The functions involved are (2.6.16 source inspection):
521 * clear_bit
522 * ptep_set_wrprotect
523 * copy_one_pte
524 * copy_pte_range
525 * copy_pmd_range
526 * copy_pud_range
527 * copy_page_range
528 * dup_mmap
529 * dup_mm
530 * copy_mm
531 * copy_process
532 * do_fork
533 */
534 if ( pCpu->pCurInstr->opcode == OP_BTR
535 && !(offFault & 4)
536 /** @todo Validate that the bit index is X86_PTE_RW. */
537 )
538 {
539 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
540 return true;
541 }
542 return false;
543}
544
545
546/**
547 * Determin whether the page is likely to have been reused.
548 *
549 * @returns true if we consider the page as being reused for a different purpose.
550 * @returns false if we consider it to still be a paging page.
551 * @param pPage The page in question.
552 * @param pCpu The disassembly info for the faulting insturction.
553 * @param pvFault The fault address.
554 *
555 * @remark The REP prefix check is left to the caller because of STOSD/W.
556 */
557DECLINLINE(bool) pgmPoolMonitorIsReused(PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu, RTGCPTR pvFault)
558{
559 switch (pCpu->pCurInstr->opcode)
560 {
561 case OP_PUSH:
562 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
563 return true;
564 case OP_PUSHF:
565 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
566 return true;
567 case OP_PUSHA:
568 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
569 return true;
570 case OP_FXSAVE:
571 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
572 return true;
573 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
574 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
575 return true;
576 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
577 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
578 return true;
579 }
580 if ( (pCpu->param1.flags & USE_REG_GEN32)
581 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
582 {
583 Log4(("pgmPoolMonitorIsReused: ESP\n"));
584 return true;
585 }
586
587 //if (pPage->fCR3Mix)
588 // return false;
589 return false;
590}
591
592
593/**
594 * Flushes the page being accessed.
595 *
596 * @returns VBox status code suitable for scheduling.
597 * @param pVM The VM handle.
598 * @param pPool The pool.
599 * @param pPage The pool page (head).
600 * @param pCpu The disassembly of the write instruction.
601 * @param pRegFrame The trap register frame.
602 * @param GCPhysFault The fault address as guest physical address.
603 * @param pvFault The fault address.
604 */
605static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
606 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
607{
608 /*
609 * First, do the flushing.
610 */
611 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
612
613 /*
614 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
615 */
616 uint32_t cbWritten;
617 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
618 if (VBOX_SUCCESS(rc2))
619 pRegFrame->eip += pCpu->opsize;
620 else if (rc2 == VERR_EM_INTERPRETER)
621 {
622#ifdef IN_GC
623 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
624 {
625 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
626 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
627 rc = VINF_SUCCESS;
628 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
629 }
630 else
631#endif
632 {
633 rc = VINF_EM_RAW_EMULATE_INSTR;
634 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
635 }
636 }
637 else
638 rc = rc2;
639
640 /* See use in pgmPoolAccessHandlerSimple(). */
641 PGM_INVL_GUEST_TLBS();
642
643 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
644 return rc;
645
646}
647
648
649/**
650 * Handles the STOSD write accesses.
651 *
652 * @returns VBox status code suitable for scheduling.
653 * @param pVM The VM handle.
654 * @param pPool The pool.
655 * @param pPage The pool page (head).
656 * @param pCpu The disassembly of the write instruction.
657 * @param pRegFrame The trap register frame.
658 * @param GCPhysFault The fault address as guest physical address.
659 * @param pvFault The fault address.
660 */
661DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
662 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
663{
664 /*
665 * Increment the modification counter and insert it into the list
666 * of modified pages the first time.
667 */
668 if (!pPage->cModifications++)
669 pgmPoolMonitorModifiedInsert(pPool, pPage);
670
671 /*
672 * Execute REP STOSD.
673 *
674 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
675 * write situation, meaning that it's safe to write here.
676 */
677 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
678 while (pRegFrame->ecx)
679 {
680 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
681#ifdef IN_GC
682 *(uint32_t *)pu32 = pRegFrame->eax;
683#else
684 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
685#endif
686 pu32 += 4;
687 GCPhysFault += 4;
688 pRegFrame->edi += 4;
689 pRegFrame->ecx--;
690 }
691 pRegFrame->eip += pCpu->opsize;
692
693 /* See use in pgmPoolAccessHandlerSimple(). */
694 PGM_INVL_GUEST_TLBS();
695
696 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
697 return VINF_SUCCESS;
698}
699
700
701/**
702 * Handles the simple write accesses.
703 *
704 * @returns VBox status code suitable for scheduling.
705 * @param pVM The VM handle.
706 * @param pPool The pool.
707 * @param pPage The pool page (head).
708 * @param pCpu The disassembly of the write instruction.
709 * @param pRegFrame The trap register frame.
710 * @param GCPhysFault The fault address as guest physical address.
711 * @param pvFault The fault address.
712 */
713DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
714 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
715{
716 /*
717 * Increment the modification counter and insert it into the list
718 * of modified pages the first time.
719 */
720 if (!pPage->cModifications++)
721 pgmPoolMonitorModifiedInsert(pPool, pPage);
722
723 /*
724 * Clear all the pages. ASSUMES that pvFault is readable.
725 */
726 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
727
728 /*
729 * Interpret the instruction.
730 */
731 uint32_t cb;
732 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
733 if (VBOX_SUCCESS(rc))
734 pRegFrame->eip += pCpu->opsize;
735 else if (rc == VERR_EM_INTERPRETER)
736 {
737 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv - opcode=%d\n",
738 pRegFrame->cs, (RTGCPTR)pRegFrame->eip, pCpu->pCurInstr->opcode));
739 rc = VINF_EM_RAW_EMULATE_INSTR;
740 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
741 }
742
743 /*
744 * Quick hack, with logging enabled we're getting stale
745 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
746 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
747 * have to be fixed to support this. But that'll have to wait till next week.
748 *
749 * An alternative is to keep track of the changed PTEs together with the
750 * GCPhys from the guest PT. This may proove expensive though.
751 *
752 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
753 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
754 */
755 PGM_INVL_GUEST_TLBS();
756
757 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
758 return rc;
759}
760
761
762/**
763 * \#PF Handler callback for PT write accesses.
764 *
765 * @returns VBox status code (appropriate for GC return).
766 * @param pVM VM Handle.
767 * @param uErrorCode CPU Error code.
768 * @param pRegFrame Trap register frame.
769 * NULL on DMA and other non CPU access.
770 * @param pvFault The fault address (cr2).
771 * @param GCPhysFault The GC physical address corresponding to pvFault.
772 * @param pvUser User argument.
773 */
774DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
775{
776 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
777 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
778 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
779 LogFlow(("pgmPoolAccessHandler: pvFault=%VGv pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
780
781 /*
782 * We should ALWAYS have the list head as user parameter. This
783 * is because we use that page to record the changes.
784 */
785 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
786
787 /*
788 * Disassemble the faulting instruction.
789 */
790 DISCPUSTATE Cpu;
791 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
792 AssertRCReturn(rc, rc);
793
794 /*
795 * Check if it's worth dealing with.
796 */
797 bool fReused = false;
798 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
799 || pPage->fCR3Mix)
800 && !(fReused = pgmPoolMonitorIsReused(pPage, &Cpu, pvFault))
801 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
802 {
803 /*
804 * Simple instructions, no REP prefix.
805 */
806 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
807 {
808 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
809 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
810 return rc;
811 }
812
813 /*
814 * Windows is frequently doing small memset() operations (netio test 4k+).
815 * We have to deal with these or we'll kill the cache and performance.
816 */
817 if ( Cpu.pCurInstr->opcode == OP_STOSWD
818 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
819 && pRegFrame->ecx <= 0x20
820 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
821 && !((uintptr_t)pvFault & 3)
822 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
823 && Cpu.mode == CPUMODE_32BIT
824 && Cpu.opmode == CPUMODE_32BIT
825 && Cpu.addrmode == CPUMODE_32BIT
826 && Cpu.prefix == PREFIX_REP
827 && !pRegFrame->eflags.Bits.u1DF
828 )
829 {
830 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
831 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
832 return rc;
833 }
834
835 /* REP prefix, don't bother. */
836 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
837 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%#x opcode=%d prefix=%#x\n",
838 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->eip, Cpu.pCurInstr->opcode, Cpu.prefix));
839 }
840
841 /*
842 * Not worth it, so flush it.
843 *
844 * If we considered it to be reused, don't to back to ring-3
845 * to emulate failed instructions since we usually cannot
846 * interpret then. This may be a bit risky, in which case
847 * the reuse detection must be fixed.
848 */
849 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
850 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
851 rc = VINF_SUCCESS;
852 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
853 return rc;
854}
855
856# endif /* !IN_RING3 */
857#endif /* PGMPOOL_WITH_MONITORING */
858
859
860
861#ifdef PGMPOOL_WITH_CACHE
862/**
863 * Inserts a page into the GCPhys hash table.
864 *
865 * @param pPool The pool.
866 * @param pPage The page.
867 */
868DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
869{
870 Log3(("pgmPoolHashInsert: %VGp\n", pPage->GCPhys));
871 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
872 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
873 pPage->iNext = pPool->aiHash[iHash];
874 pPool->aiHash[iHash] = pPage->idx;
875}
876
877
878/**
879 * Removes a page from the GCPhys hash table.
880 *
881 * @param pPool The pool.
882 * @param pPage The page.
883 */
884DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
885{
886 Log3(("pgmPoolHashRemove: %VGp\n", pPage->GCPhys));
887 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
888 if (pPool->aiHash[iHash] == pPage->idx)
889 pPool->aiHash[iHash] = pPage->iNext;
890 else
891 {
892 uint16_t iPrev = pPool->aiHash[iHash];
893 for (;;)
894 {
895 const int16_t i = pPool->aPages[iPrev].iNext;
896 if (i == pPage->idx)
897 {
898 pPool->aPages[iPrev].iNext = pPage->iNext;
899 break;
900 }
901 if (i == NIL_PGMPOOL_IDX)
902 {
903 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
904 break;
905 }
906 iPrev = i;
907 }
908 }
909 pPage->iNext = NIL_PGMPOOL_IDX;
910}
911
912
913/**
914 * Frees up one cache page.
915 *
916 * @returns VBox status code.
917 * @retval VINF_SUCCESS on success.
918 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
919 * @param pPool The pool.
920 * @param iUser The user index.
921 */
922static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
923{
924#ifndef IN_GC
925 const PVM pVM = pPool->CTXSUFF(pVM);
926#endif
927 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
928 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
929
930 /*
931 * Select one page from the tail of the age list.
932 */
933 uint16_t iToFree = pPool->iAgeTail;
934 if (iToFree == iUser)
935 iToFree = pPool->aPages[iToFree].iAgePrev;
936/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
937 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
938 {
939 uint16_t i = pPool->aPages[iToFree].iAgePrev;
940 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
941 {
942 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
943 continue;
944 iToFree = i;
945 break;
946 }
947 }
948*/
949 Assert(iToFree != iUser);
950 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
951
952 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
953 if (rc == VINF_SUCCESS)
954 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
955 return rc;
956}
957
958
959/**
960 * Checks if a kind mismatch is really a page being reused
961 * or if it's just normal remappings.
962 *
963 * @returns true if reused and the cached page (enmKind1) should be flushed
964 * @returns false if not reused.
965 * @param enmKind1 The kind of the cached page.
966 * @param enmKind2 The kind of the requested page.
967 */
968static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
969{
970 switch (enmKind1)
971 {
972 /*
973 * Never reuse them. There is no remapping in non-paging mode.
974 */
975 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
976 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
977 return true;
978
979 /*
980 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
981 */
982 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
983 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
984 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
985 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
986 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
987 switch (enmKind2)
988 {
989 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
990 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
991 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
992 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
993 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
994 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
995 return true;
996 default:
997 return false;
998 }
999
1000 /*
1001 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1002 */
1003 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1004 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1005 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1006 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1007 switch (enmKind2)
1008 {
1009 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1010 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1011 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1012 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1013 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1014 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1015 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1016 return true;
1017 default:
1018 return false;
1019 }
1020
1021 /*
1022 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1023 */
1024 case PGMPOOLKIND_ROOT_32BIT_PD:
1025 case PGMPOOLKIND_ROOT_PAE_PD:
1026 case PGMPOOLKIND_ROOT_PDPT:
1027 case PGMPOOLKIND_ROOT_PML4:
1028 return false;
1029
1030 default:
1031 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1032 }
1033}
1034
1035
1036/**
1037 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1038 *
1039 * @returns VBox status code.
1040 * @retval VINF_PGM_CACHED_PAGE on success.
1041 * @retval VERR_FILE_NOT_FOUND if not found.
1042 * @param pPool The pool.
1043 * @param GCPhys The GC physical address of the page we're gonna shadow.
1044 * @param enmKind The kind of mapping.
1045 * @param iUser The shadow page pool index of the user table.
1046 * @param iUserTable The index into the user table (shadowed).
1047 * @param ppPage Where to store the pointer to the page.
1048 */
1049static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
1050{
1051#ifndef IN_GC
1052 const PVM pVM = pPool->CTXSUFF(pVM);
1053#endif
1054 /*
1055 * Look up the GCPhys in the hash.
1056 */
1057 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1058 Log3(("pgmPoolCacheAlloc: %VGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1059 if (i != NIL_PGMPOOL_IDX)
1060 {
1061 do
1062 {
1063 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1064 Log3(("pgmPoolCacheAlloc: slot %d found page %VGp\n", i, pPage->GCPhys));
1065 if (pPage->GCPhys == GCPhys)
1066 {
1067 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1068 {
1069 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1070 if (VBOX_SUCCESS(rc))
1071 {
1072 *ppPage = pPage;
1073 STAM_COUNTER_INC(&pPool->StatCacheHits);
1074 return VINF_PGM_CACHED_PAGE;
1075 }
1076 return rc;
1077 }
1078
1079 /*
1080 * The kind is different. In some cases we should now flush the page
1081 * as it has been reused, but in most cases this is normal remapping
1082 * of PDs as PT or big pages using the GCPhys field in a slightly
1083 * different way than the other kinds.
1084 */
1085 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1086 {
1087 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1088 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1089 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1090 break;
1091 }
1092 }
1093
1094 /* next */
1095 i = pPage->iNext;
1096 } while (i != NIL_PGMPOOL_IDX);
1097 }
1098
1099 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1100 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1101 return VERR_FILE_NOT_FOUND;
1102}
1103
1104
1105/**
1106 * Inserts a page into the cache.
1107 *
1108 * @param pPool The pool.
1109 * @param pPage The cached page.
1110 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1111 */
1112static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1113{
1114 /*
1115 * Insert into the GCPhys hash if the page is fit for that.
1116 */
1117 Assert(!pPage->fCached);
1118 if (fCanBeCached)
1119 {
1120 pPage->fCached = true;
1121 pgmPoolHashInsert(pPool, pPage);
1122 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1123 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1124 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1125 }
1126 else
1127 {
1128 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1129 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1130 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1131 }
1132
1133 /*
1134 * Insert at the head of the age list.
1135 */
1136 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1137 pPage->iAgeNext = pPool->iAgeHead;
1138 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1139 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1140 else
1141 pPool->iAgeTail = pPage->idx;
1142 pPool->iAgeHead = pPage->idx;
1143}
1144
1145
1146/**
1147 * Flushes a cached page.
1148 *
1149 * @param pPool The pool.
1150 * @param pPage The cached page.
1151 */
1152static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1153{
1154 Log3(("pgmPoolCacheFlushPage: %VGp\n", pPage->GCPhys));
1155
1156 /*
1157 * Remove the page from the hash.
1158 */
1159 if (pPage->fCached)
1160 {
1161 pPage->fCached = false;
1162 pgmPoolHashRemove(pPool, pPage);
1163 }
1164 else
1165 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1166
1167 /*
1168 * Remove it from the age list.
1169 */
1170 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1171 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1172 else
1173 pPool->iAgeTail = pPage->iAgePrev;
1174 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1175 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1176 else
1177 pPool->iAgeHead = pPage->iAgeNext;
1178 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1179 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1180}
1181#endif /* PGMPOOL_WITH_CACHE */
1182
1183
1184#ifdef PGMPOOL_WITH_MONITORING
1185/**
1186 * Looks for pages sharing the monitor.
1187 *
1188 * @returns Pointer to the head page.
1189 * @returns NULL if not found.
1190 * @param pPool The Pool
1191 * @param pNewPage The page which is going to be monitored.
1192 */
1193static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1194{
1195#ifdef PGMPOOL_WITH_CACHE
1196 /*
1197 * Look up the GCPhys in the hash.
1198 */
1199 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1200 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1201 if (i == NIL_PGMPOOL_IDX)
1202 return NULL;
1203 do
1204 {
1205 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1206 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1207 && pPage != pNewPage)
1208 {
1209 switch (pPage->enmKind)
1210 {
1211 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1212 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1213 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1214 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1215 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1216 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1217 case PGMPOOLKIND_ROOT_32BIT_PD:
1218 case PGMPOOLKIND_ROOT_PAE_PD:
1219 case PGMPOOLKIND_ROOT_PDPT:
1220 case PGMPOOLKIND_ROOT_PML4:
1221 {
1222 /* find the head */
1223 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1224 {
1225 Assert(pPage->iMonitoredPrev != pPage->idx);
1226 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1227 }
1228 return pPage;
1229 }
1230
1231 /* ignore, no monitoring. */
1232 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1233 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1234 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1235 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1236 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1237 break;
1238 default:
1239 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1240 }
1241 }
1242
1243 /* next */
1244 i = pPage->iNext;
1245 } while (i != NIL_PGMPOOL_IDX);
1246#endif
1247 return NULL;
1248}
1249
1250/**
1251 * Enabled write monitoring of a guest page.
1252 *
1253 * @returns VBox status code.
1254 * @retval VINF_SUCCESS on success.
1255 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1256 * @param pPool The pool.
1257 * @param pPage The cached page.
1258 */
1259static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1260{
1261 LogFlow(("pgmPoolMonitorInsert %VGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1262
1263 /*
1264 * Filter out the relevant kinds.
1265 */
1266 switch (pPage->enmKind)
1267 {
1268 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1269 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1270 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1271 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1272 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1273 case PGMPOOLKIND_ROOT_PDPT:
1274 break;
1275
1276 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1277 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1278 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1279 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1280 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1281 /* Nothing to monitor here. */
1282 return VINF_SUCCESS;
1283
1284 case PGMPOOLKIND_ROOT_32BIT_PD:
1285 case PGMPOOLKIND_ROOT_PAE_PD:
1286 case PGMPOOLKIND_ROOT_PML4:
1287#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1288 break;
1289#endif
1290 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1291 default:
1292 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1293 }
1294
1295 /*
1296 * Install handler.
1297 */
1298 int rc;
1299 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1300 if (pPageHead)
1301 {
1302 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1303 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1304 pPage->iMonitoredPrev = pPageHead->idx;
1305 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1306 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1307 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1308 pPageHead->iMonitoredNext = pPage->idx;
1309 rc = VINF_SUCCESS;
1310 }
1311 else
1312 {
1313 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1314 PVM pVM = pPool->CTXSUFF(pVM);
1315 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1316 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1317 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1318 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1319 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1320 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
1321 pPool->pszAccessHandler);
1322 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1323 * the heap size should suffice. */
1324 AssertFatalRC(rc);
1325 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1326 rc = VERR_PGM_POOL_CLEARED;
1327 }
1328 pPage->fMonitored = true;
1329 return rc;
1330}
1331
1332
1333/**
1334 * Disables write monitoring of a guest page.
1335 *
1336 * @returns VBox status code.
1337 * @retval VINF_SUCCESS on success.
1338 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1339 * @param pPool The pool.
1340 * @param pPage The cached page.
1341 */
1342static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1343{
1344 /*
1345 * Filter out the relevant kinds.
1346 */
1347 switch (pPage->enmKind)
1348 {
1349 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1350 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1351 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1352 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1353 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1354 case PGMPOOLKIND_ROOT_PDPT:
1355 break;
1356
1357 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1358 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1359 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1360 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1361 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1362 /* Nothing to monitor here. */
1363 return VINF_SUCCESS;
1364
1365 case PGMPOOLKIND_ROOT_32BIT_PD:
1366 case PGMPOOLKIND_ROOT_PAE_PD:
1367 case PGMPOOLKIND_ROOT_PML4:
1368#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1369 break;
1370#endif
1371 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1372 default:
1373 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1374 }
1375
1376 /*
1377 * Remove the page from the monitored list or uninstall it if last.
1378 */
1379 const PVM pVM = pPool->CTXSUFF(pVM);
1380 int rc;
1381 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1382 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1383 {
1384 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1385 {
1386 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1387 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1388 pNewHead->fCR3Mix = pPage->fCR3Mix;
1389 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1390 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1391 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1392 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pNewHead),
1393 pPool->pszAccessHandler);
1394 AssertFatalRCSuccess(rc);
1395 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1396 }
1397 else
1398 {
1399 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1400 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1401 {
1402 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1403 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1404 }
1405 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1406 rc = VINF_SUCCESS;
1407 }
1408 }
1409 else
1410 {
1411 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1412 AssertFatalRC(rc);
1413 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1414 rc = VERR_PGM_POOL_CLEARED;
1415 }
1416 pPage->fMonitored = false;
1417
1418 /*
1419 * Remove it from the list of modified pages (if in it).
1420 */
1421 pgmPoolMonitorModifiedRemove(pPool, pPage);
1422
1423 return rc;
1424}
1425
1426
1427#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1428/**
1429 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1430 *
1431 * @param pPool The Pool.
1432 * @param pPage A page in the chain.
1433 * @param fCR3Mix The new fCR3Mix value.
1434 */
1435static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1436{
1437 /* current */
1438 pPage->fCR3Mix = fCR3Mix;
1439
1440 /* before */
1441 int16_t idx = pPage->iMonitoredPrev;
1442 while (idx != NIL_PGMPOOL_IDX)
1443 {
1444 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1445 idx = pPool->aPages[idx].iMonitoredPrev;
1446 }
1447
1448 /* after */
1449 idx = pPage->iMonitoredNext;
1450 while (idx != NIL_PGMPOOL_IDX)
1451 {
1452 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1453 idx = pPool->aPages[idx].iMonitoredNext;
1454 }
1455}
1456
1457
1458/**
1459 * Installs or modifies monitoring of a CR3 page (special).
1460 *
1461 * We're pretending the CR3 page is shadowed by the pool so we can use the
1462 * generic mechanisms in detecting chained monitoring. (This also gives us a
1463 * tast of what code changes are required to really pool CR3 shadow pages.)
1464 *
1465 * @returns VBox status code.
1466 * @param pPool The pool.
1467 * @param idxRoot The CR3 (root) page index.
1468 * @param GCPhysCR3 The (new) CR3 value.
1469 */
1470int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1471{
1472 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1473 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1474 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1475 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1476
1477 /*
1478 * The unlikely case where it already matches.
1479 */
1480 if (pPage->GCPhys == GCPhysCR3)
1481 {
1482 Assert(pPage->fMonitored);
1483 return VINF_SUCCESS;
1484 }
1485
1486 /*
1487 * Flush the current monitoring and remove it from the hash.
1488 */
1489 int rc = VINF_SUCCESS;
1490 if (pPage->fMonitored)
1491 {
1492 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1493 rc = pgmPoolMonitorFlush(pPool, pPage);
1494 if (rc == VERR_PGM_POOL_CLEARED)
1495 rc = VINF_SUCCESS;
1496 else
1497 AssertFatalRC(rc);
1498 pgmPoolHashRemove(pPool, pPage);
1499 }
1500
1501 /*
1502 * Monitor the page at the new location and insert it into the hash.
1503 */
1504 pPage->GCPhys = GCPhysCR3;
1505 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1506 if (rc2 != VERR_PGM_POOL_CLEARED)
1507 {
1508 AssertFatalRC(rc2);
1509 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1510 rc = rc2;
1511 }
1512 pgmPoolHashInsert(pPool, pPage);
1513 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1514 return rc;
1515}
1516
1517
1518/**
1519 * Removes the monitoring of a CR3 page (special).
1520 *
1521 * @returns VBox status code.
1522 * @param pPool The pool.
1523 * @param idxRoot The CR3 (root) page index.
1524 */
1525int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1526{
1527 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1528 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1529 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1530 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1531
1532 if (!pPage->fMonitored)
1533 return VINF_SUCCESS;
1534
1535 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1536 int rc = pgmPoolMonitorFlush(pPool, pPage);
1537 if (rc != VERR_PGM_POOL_CLEARED)
1538 AssertFatalRC(rc);
1539 else
1540 rc = VINF_SUCCESS;
1541 pgmPoolHashRemove(pPool, pPage);
1542 Assert(!pPage->fMonitored);
1543 pPage->GCPhys = NIL_RTGCPHYS;
1544 return rc;
1545}
1546#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1547
1548
1549/**
1550 * Inserts the page into the list of modified pages.
1551 *
1552 * @param pPool The pool.
1553 * @param pPage The page.
1554 */
1555void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1556{
1557 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1558 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1559 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1560 && pPool->iModifiedHead != pPage->idx,
1561 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1562 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1563 pPool->iModifiedHead, pPool->cModifiedPages));
1564
1565 pPage->iModifiedNext = pPool->iModifiedHead;
1566 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1567 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1568 pPool->iModifiedHead = pPage->idx;
1569 pPool->cModifiedPages++;
1570#ifdef VBOX_WITH_STATISTICS
1571 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1572 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1573#endif
1574}
1575
1576
1577/**
1578 * Removes the page from the list of modified pages and resets the
1579 * moficiation counter.
1580 *
1581 * @param pPool The pool.
1582 * @param pPage The page which is believed to be in the list of modified pages.
1583 */
1584static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1585{
1586 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1587 if (pPool->iModifiedHead == pPage->idx)
1588 {
1589 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1590 pPool->iModifiedHead = pPage->iModifiedNext;
1591 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1592 {
1593 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1594 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1595 }
1596 pPool->cModifiedPages--;
1597 }
1598 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1599 {
1600 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1601 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1602 {
1603 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1604 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1605 }
1606 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1607 pPool->cModifiedPages--;
1608 }
1609 else
1610 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1611 pPage->cModifications = 0;
1612}
1613
1614
1615/**
1616 * Zaps the list of modified pages, resetting their modification counters in the process.
1617 *
1618 * @param pVM The VM handle.
1619 */
1620void pgmPoolMonitorModifiedClearAll(PVM pVM)
1621{
1622 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1623 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1624
1625 unsigned cPages = 0; NOREF(cPages);
1626 uint16_t idx = pPool->iModifiedHead;
1627 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1628 while (idx != NIL_PGMPOOL_IDX)
1629 {
1630 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1631 idx = pPage->iModifiedNext;
1632 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1633 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1634 pPage->cModifications = 0;
1635 Assert(++cPages);
1636 }
1637 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1638 pPool->cModifiedPages = 0;
1639}
1640
1641
1642/**
1643 * Clear all shadow pages and clear all modification counters.
1644 *
1645 * @param pVM The VM handle.
1646 * @remark Should only be used when monitoring is available, thus placed in
1647 * the PGMPOOL_WITH_MONITORING #ifdef.
1648 */
1649void pgmPoolClearAll(PVM pVM)
1650{
1651 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1652 STAM_PROFILE_START(&pPool->StatClearAll, c);
1653 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1654
1655 /*
1656 * Iterate all the pages until we've encountered all that in use.
1657 * This is simple but not quite optimal solution.
1658 */
1659 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1660 unsigned cLeft = pPool->cUsedPages;
1661 unsigned iPage = pPool->cCurPages;
1662 while (--iPage >= PGMPOOL_IDX_FIRST)
1663 {
1664 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1665 if (pPage->GCPhys != NIL_RTGCPHYS)
1666 {
1667 switch (pPage->enmKind)
1668 {
1669 /*
1670 * We only care about shadow page tables.
1671 */
1672 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1673 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1674 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1675 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1676 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1677 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1678 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1679 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1680 {
1681#ifdef PGMPOOL_WITH_USER_TRACKING
1682 if (pPage->cPresent)
1683#endif
1684 {
1685 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1686 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1687 ASMMemZeroPage(pvShw);
1688 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1689#ifdef PGMPOOL_WITH_USER_TRACKING
1690 pPage->cPresent = 0;
1691 pPage->iFirstPresent = ~0;
1692#endif
1693 }
1694 }
1695 /* fall thru */
1696
1697 default:
1698 Assert(!pPage->cModifications || ++cModifiedPages);
1699 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1700 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1701 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1702 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1703 pPage->cModifications = 0;
1704 break;
1705
1706 }
1707 if (!--cLeft)
1708 break;
1709 }
1710 }
1711
1712 /* swipe the special pages too. */
1713 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1714 {
1715 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1716 if (pPage->GCPhys != NIL_RTGCPHYS)
1717 {
1718 Assert(!pPage->cModifications || ++cModifiedPages);
1719 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1720 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1721 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1722 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1723 pPage->cModifications = 0;
1724 }
1725 }
1726
1727#ifndef DEBUG_michael
1728 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1729#endif
1730 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1731 pPool->cModifiedPages = 0;
1732
1733#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1734 /*
1735 * Clear all the GCPhys links and rebuild the phys ext free list.
1736 */
1737 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
1738 pRam;
1739 pRam = CTXALLSUFF(pRam->pNext))
1740 {
1741 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1742 while (iPage-- > 0)
1743 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
1744 }
1745
1746 pPool->iPhysExtFreeHead = 0;
1747 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1748 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1749 for (unsigned i = 0; i < cMaxPhysExts; i++)
1750 {
1751 paPhysExts[i].iNext = i + 1;
1752 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1753 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1754 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1755 }
1756 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1757#endif
1758
1759
1760 pPool->cPresent = 0;
1761 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1762}
1763#endif /* PGMPOOL_WITH_MONITORING */
1764
1765
1766#ifdef PGMPOOL_WITH_USER_TRACKING
1767/**
1768 * Frees up at least one user entry.
1769 *
1770 * @returns VBox status code.
1771 * @retval VINF_SUCCESS if successfully added.
1772 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1773 * @param pPool The pool.
1774 * @param iUser The user index.
1775 */
1776static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
1777{
1778 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
1779#ifdef PGMPOOL_WITH_CACHE
1780 /*
1781 * Just free cached pages in a braindead fashion.
1782 */
1783 /** @todo walk the age list backwards and free the first with usage. */
1784 int rc = VINF_SUCCESS;
1785 do
1786 {
1787 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
1788 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
1789 rc = rc2;
1790 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
1791 return rc;
1792#else
1793 /*
1794 * Lazy approach.
1795 */
1796 pgmPoolFlushAllInt(pPool);
1797 return VERR_PGM_POOL_FLUSHED;
1798#endif
1799}
1800
1801
1802/**
1803 * Inserts a page into the cache.
1804 *
1805 * This will create user node for the page, insert it into the GCPhys
1806 * hash, and insert it into the age list.
1807 *
1808 * @returns VBox status code.
1809 * @retval VINF_SUCCESS if successfully added.
1810 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1811 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1812 * @param pPool The pool.
1813 * @param pPage The cached page.
1814 * @param GCPhys The GC physical address of the page we're gonna shadow.
1815 * @param iUser The user index.
1816 * @param iUserTable The user table index.
1817 */
1818DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint16_t iUserTable)
1819{
1820 int rc = VINF_SUCCESS;
1821 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
1822
1823 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
1824
1825 /*
1826 * Find free a user node.
1827 */
1828 uint16_t i = pPool->iUserFreeHead;
1829 if (i == NIL_PGMPOOL_USER_INDEX)
1830 {
1831 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1832 if (VBOX_FAILURE(rc))
1833 return rc;
1834 i = pPool->iUserFreeHead;
1835 }
1836
1837 /*
1838 * Unlink the user node from the free list,
1839 * initialize and insert it into the user list.
1840 */
1841 pPool->iUserFreeHead = pUser[i].iNext;
1842 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
1843 pUser[i].iUser = iUser;
1844 pUser[i].iUserTable = iUserTable;
1845 pPage->iUserHead = i;
1846
1847 /*
1848 * Insert into cache and enable monitoring of the guest page if enabled.
1849 *
1850 * Until we implement caching of all levels, including the CR3 one, we'll
1851 * have to make sure we don't try monitor & cache any recursive reuse of
1852 * a monitored CR3 page. Because all windows versions are doing this we'll
1853 * have to be able to do combined access monitoring, CR3 + PT and
1854 * PD + PT (guest PAE).
1855 *
1856 * Update:
1857 * We're now cooperating with the CR3 monitor if an uncachable page is found.
1858 */
1859#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
1860# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1861 const bool fCanBeMonitored = true;
1862# else
1863 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
1864 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
1865 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
1866# endif
1867# ifdef PGMPOOL_WITH_CACHE
1868 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
1869# endif
1870 if (fCanBeMonitored)
1871 {
1872# ifdef PGMPOOL_WITH_MONITORING
1873 rc = pgmPoolMonitorInsert(pPool, pPage);
1874 if (rc == VERR_PGM_POOL_CLEARED)
1875 {
1876 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
1877# ifndef PGMPOOL_WITH_CACHE
1878 pgmPoolMonitorFlush(pPool, pPage);
1879 rc = VERR_PGM_POOL_FLUSHED;
1880# endif
1881 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
1882 pUser[i].iNext = pPool->iUserFreeHead;
1883 pUser[i].iUser = NIL_PGMPOOL_IDX;
1884 pPool->iUserFreeHead = i;
1885 }
1886 }
1887# endif
1888#endif /* PGMPOOL_WITH_MONITORING */
1889 return rc;
1890}
1891
1892
1893# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
1894/**
1895 * Adds a user reference to a page.
1896 *
1897 * This will
1898 * This will move the page to the head of the
1899 *
1900 * @returns VBox status code.
1901 * @retval VINF_SUCCESS if successfully added.
1902 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1903 * @param pPool The pool.
1904 * @param pPage The cached page.
1905 * @param iUser The user index.
1906 * @param iUserTable The user table.
1907 */
1908static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1909{
1910 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1911
1912 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
1913# ifdef VBOX_STRICT
1914 /*
1915 * Check that the entry doesn't already exists.
1916 */
1917 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
1918 {
1919 uint16_t i = pPage->iUserHead;
1920 do
1921 {
1922 Assert(i < pPool->cMaxUsers);
1923 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
1924 i = paUsers[i].iNext;
1925 } while (i != NIL_PGMPOOL_USER_INDEX);
1926 }
1927# endif
1928
1929 /*
1930 * Allocate a user node.
1931 */
1932 uint16_t i = pPool->iUserFreeHead;
1933 if (i == NIL_PGMPOOL_USER_INDEX)
1934 {
1935 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1936 if (VBOX_FAILURE(rc))
1937 return rc;
1938 i = pPool->iUserFreeHead;
1939 }
1940 pPool->iUserFreeHead = paUsers[i].iNext;
1941
1942 /*
1943 * Initialize the user node and insert it.
1944 */
1945 paUsers[i].iNext = pPage->iUserHead;
1946 paUsers[i].iUser = iUser;
1947 paUsers[i].iUserTable = iUserTable;
1948 pPage->iUserHead = i;
1949
1950# ifdef PGMPOOL_WITH_CACHE
1951 /*
1952 * Tell the cache to update its replacement stats for this page.
1953 */
1954 pgmPoolCacheUsed(pPool, pPage);
1955# endif
1956 return VINF_SUCCESS;
1957}
1958# endif /* PGMPOOL_WITH_CACHE */
1959
1960
1961/**
1962 * Frees a user record associated with a page.
1963 *
1964 * This does not clear the entry in the user table, it simply replaces the
1965 * user record to the chain of free records.
1966 *
1967 * @param pPool The pool.
1968 * @param HCPhys The HC physical address of the shadow page.
1969 * @param iUser The shadow page pool index of the user table.
1970 * @param iUserTable The index into the user table (shadowed).
1971 */
1972static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1973{
1974 /*
1975 * Unlink and free the specified user entry.
1976 */
1977 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1978
1979 /* Special: For PAE and 32-bit paging, there are usually no more than one user. */
1980 uint16_t i = pPage->iUserHead;
1981 if ( i != NIL_PGMPOOL_USER_INDEX
1982 && paUsers[i].iUser == iUser
1983 && paUsers[i].iUserTable == iUserTable)
1984 {
1985 pPage->iUserHead = paUsers[i].iNext;
1986
1987 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1988 paUsers[i].iNext = pPool->iUserFreeHead;
1989 pPool->iUserFreeHead = i;
1990 return;
1991 }
1992
1993 /* General: Linear search. */
1994 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
1995 while (i != NIL_PGMPOOL_USER_INDEX)
1996 {
1997 if ( paUsers[i].iUser == iUser
1998 && paUsers[i].iUserTable == iUserTable)
1999 {
2000 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2001 paUsers[iPrev].iNext = paUsers[i].iNext;
2002 else
2003 pPage->iUserHead = paUsers[i].iNext;
2004
2005 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2006 paUsers[i].iNext = pPool->iUserFreeHead;
2007 pPool->iUserFreeHead = i;
2008 return;
2009 }
2010 iPrev = i;
2011 i = paUsers[i].iNext;
2012 }
2013
2014 /* Fatal: didn't find it */
2015 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
2016 iUser, iUserTable, pPage->GCPhys));
2017}
2018
2019
2020/**
2021 * Gets the entry size of a shadow table.
2022 *
2023 * @param enmKind The kind of page.
2024 *
2025 * @returns The size of the entry in bytes. That is, 4 or 8.
2026 * @returns If the kind is not for a table, an assertion is raised and 0 is
2027 * returned.
2028 */
2029DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2030{
2031 switch (enmKind)
2032 {
2033 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2034 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2035 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2036 case PGMPOOLKIND_ROOT_32BIT_PD:
2037 return 4;
2038
2039 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2040 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2041 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2042 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2043 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2044 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2045 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2046 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2047 case PGMPOOLKIND_ROOT_PAE_PD:
2048 case PGMPOOLKIND_ROOT_PDPT:
2049 case PGMPOOLKIND_ROOT_PML4:
2050 return 8;
2051
2052 default:
2053 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2054 }
2055}
2056
2057
2058/**
2059 * Gets the entry size of a guest table.
2060 *
2061 * @param enmKind The kind of page.
2062 *
2063 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2064 * @returns If the kind is not for a table, an assertion is raised and 0 is
2065 * returned.
2066 */
2067DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2068{
2069 switch (enmKind)
2070 {
2071 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2072 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2073 case PGMPOOLKIND_ROOT_32BIT_PD:
2074 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2075 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2076 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2077 return 4;
2078
2079 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2080 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2081 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2082 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2083 case PGMPOOLKIND_ROOT_PAE_PD:
2084 case PGMPOOLKIND_ROOT_PDPT:
2085 case PGMPOOLKIND_ROOT_PML4:
2086 return 8;
2087
2088 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2089 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2090 /** @todo can we return 0? (nobody is calling this...) */
2091 return 0;
2092
2093 default:
2094 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2095 }
2096}
2097
2098
2099#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2100/**
2101 * Scans one shadow page table for mappings of a physical page.
2102 *
2103 * @param pVM The VM handle.
2104 * @param pPhysPage The guest page in question.
2105 * @param iShw The shadow page table.
2106 * @param cRefs The number of references made in that PT.
2107 */
2108static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2109{
2110 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2111 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2112
2113 /*
2114 * Assert sanity.
2115 */
2116 Assert(cRefs == 1);
2117 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2118 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2119
2120 /*
2121 * Then, clear the actual mappings to the page in the shadow PT.
2122 */
2123 switch (pPage->enmKind)
2124 {
2125 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2126 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2127 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2128 {
2129 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2130 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2131 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2132 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2133 {
2134 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2135 pPT->a[i].u = 0;
2136 cRefs--;
2137 if (!cRefs)
2138 return;
2139 }
2140#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2141 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2142 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2143 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2144 {
2145 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2146 pPT->a[i].u = 0;
2147 }
2148#endif
2149 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2150 break;
2151 }
2152
2153 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2154 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2155 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2156 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2157 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2158 {
2159 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2160 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2161 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2162 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2163 {
2164 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2165 pPT->a[i].u = 0;
2166 cRefs--;
2167 if (!cRefs)
2168 return;
2169 }
2170#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2171 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2172 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2173 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2174 {
2175 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2176 pPT->a[i].u = 0;
2177 }
2178#endif
2179 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2180 break;
2181 }
2182
2183 default:
2184 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2185 }
2186}
2187
2188
2189/**
2190 * Scans one shadow page table for mappings of a physical page.
2191 *
2192 * @param pVM The VM handle.
2193 * @param pPhysPage The guest page in question.
2194 * @param iShw The shadow page table.
2195 * @param cRefs The number of references made in that PT.
2196 */
2197void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2198{
2199 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2200 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2201 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2202 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2203 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2204 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2205}
2206
2207
2208/**
2209 * Flushes a list of shadow page tables mapping the same physical page.
2210 *
2211 * @param pVM The VM handle.
2212 * @param pPhysPage The guest page in question.
2213 * @param iPhysExt The physical cross reference extent list to flush.
2214 */
2215void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2216{
2217 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2218 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2219 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2220
2221 const uint16_t iPhysExtStart = iPhysExt;
2222 PPGMPOOLPHYSEXT pPhysExt;
2223 do
2224 {
2225 Assert(iPhysExt < pPool->cMaxPhysExts);
2226 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2227 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2228 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2229 {
2230 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2231 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2232 }
2233
2234 /* next */
2235 iPhysExt = pPhysExt->iNext;
2236 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2237
2238 /* insert the list into the free list and clear the ram range entry. */
2239 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2240 pPool->iPhysExtFreeHead = iPhysExtStart;
2241 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2242
2243 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2244}
2245#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2246
2247
2248/**
2249 * Scans all shadow page tables for mappings of a physical page.
2250 *
2251 * This may be slow, but it's most likely more efficient than cleaning
2252 * out the entire page pool / cache.
2253 *
2254 * @returns VBox status code.
2255 * @retval VINF_SUCCESS if all references has been successfully cleared.
2256 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2257 * a page pool cleaning.
2258 *
2259 * @param pVM The VM handle.
2260 * @param pPhysPage The guest page in question.
2261 */
2262int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2263{
2264 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2265 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2266 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2267 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2268
2269#if 1
2270 /*
2271 * There is a limit to what makes sense.
2272 */
2273 if (pPool->cPresent > 1024)
2274 {
2275 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2276 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2277 return VINF_PGM_GCPHYS_ALIASED;
2278 }
2279#endif
2280
2281 /*
2282 * Iterate all the pages until we've encountered all that in use.
2283 * This is simple but not quite optimal solution.
2284 */
2285 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2286 const uint32_t u32 = u64;
2287 unsigned cLeft = pPool->cUsedPages;
2288 unsigned iPage = pPool->cCurPages;
2289 while (--iPage >= PGMPOOL_IDX_FIRST)
2290 {
2291 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2292 if (pPage->GCPhys != NIL_RTGCPHYS)
2293 {
2294 switch (pPage->enmKind)
2295 {
2296 /*
2297 * We only care about shadow page tables.
2298 */
2299 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2300 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2301 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2302 {
2303 unsigned cPresent = pPage->cPresent;
2304 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2305 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2306 if (pPT->a[i].n.u1Present)
2307 {
2308 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2309 {
2310 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2311 pPT->a[i].u = 0;
2312 }
2313 if (!--cPresent)
2314 break;
2315 }
2316 break;
2317 }
2318
2319 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2320 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2321 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2322 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2323 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2324 {
2325 unsigned cPresent = pPage->cPresent;
2326 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2327 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2328 if (pPT->a[i].n.u1Present)
2329 {
2330 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2331 {
2332 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2333 pPT->a[i].u = 0;
2334 }
2335 if (!--cPresent)
2336 break;
2337 }
2338 break;
2339 }
2340 }
2341 if (!--cLeft)
2342 break;
2343 }
2344 }
2345
2346 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2347 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2348 return VINF_SUCCESS;
2349}
2350
2351
2352/**
2353 * Clears the user entry in a user table.
2354 *
2355 * This is used to remove all references to a page when flushing it.
2356 */
2357static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2358{
2359 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2360 Assert(pUser->iUser < pPool->cCurPages);
2361
2362 /*
2363 * Map the user page.
2364 */
2365 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2366 union
2367 {
2368 uint64_t *pau64;
2369 uint32_t *pau32;
2370 } u;
2371 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pUserPage);
2372
2373#ifdef VBOX_STRICT
2374 /*
2375 * Some sanity checks.
2376 */
2377 switch (pUserPage->enmKind)
2378 {
2379 case PGMPOOLKIND_ROOT_32BIT_PD:
2380 Assert(!(u.pau32[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2381 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2382 break;
2383 case PGMPOOLKIND_ROOT_PAE_PD:
2384 Assert(!(u.pau64[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2385 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2386 break;
2387 case PGMPOOLKIND_ROOT_PDPT:
2388 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2389 Assert(pUser->iUserTable < 4);
2390 break;
2391 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2392 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2393 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2394 break;
2395 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2396 case PGMPOOLKIND_ROOT_PML4:
2397 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2398 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2399 break;
2400 default:
2401 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2402 break;
2403 }
2404#endif /* VBOX_STRICT */
2405
2406 /*
2407 * Clear the entry in the user page.
2408 */
2409 switch (pUserPage->enmKind)
2410 {
2411 /* 32-bit entries */
2412 case PGMPOOLKIND_ROOT_32BIT_PD:
2413 u.pau32[pUser->iUserTable] = 0;
2414 break;
2415
2416 /* 64-bit entries */
2417 case PGMPOOLKIND_ROOT_PAE_PD:
2418 case PGMPOOLKIND_ROOT_PDPT:
2419 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2420 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2421 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2422 case PGMPOOLKIND_ROOT_PML4:
2423 u.pau64[pUser->iUserTable] = 0;
2424 break;
2425
2426 default:
2427 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2428 }
2429}
2430
2431
2432/**
2433 * Clears all users of a page.
2434 */
2435static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2436{
2437 /*
2438 * Free all the user records.
2439 */
2440 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2441 uint16_t i = pPage->iUserHead;
2442 while (i != NIL_PGMPOOL_USER_INDEX)
2443 {
2444 /* Clear enter in user table. */
2445 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2446
2447 /* Free it. */
2448 const uint16_t iNext = paUsers[i].iNext;
2449 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2450 paUsers[i].iNext = pPool->iUserFreeHead;
2451 pPool->iUserFreeHead = i;
2452
2453 /* Next. */
2454 i = iNext;
2455 }
2456 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2457}
2458
2459
2460#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2461/**
2462 * Allocates a new physical cross reference extent.
2463 *
2464 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2465 * @param pVM The VM handle.
2466 * @param piPhysExt Where to store the phys ext index.
2467 */
2468PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2469{
2470 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2471 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2472 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2473 {
2474 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2475 return NULL;
2476 }
2477 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2478 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2479 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2480 *piPhysExt = iPhysExt;
2481 return pPhysExt;
2482}
2483
2484
2485/**
2486 * Frees a physical cross reference extent.
2487 *
2488 * @param pVM The VM handle.
2489 * @param iPhysExt The extent to free.
2490 */
2491void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2492{
2493 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2494 Assert(iPhysExt < pPool->cMaxPhysExts);
2495 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2496 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2497 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2498 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2499 pPool->iPhysExtFreeHead = iPhysExt;
2500}
2501
2502
2503/**
2504 * Frees a physical cross reference extent.
2505 *
2506 * @param pVM The VM handle.
2507 * @param iPhysExt The extent to free.
2508 */
2509void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2510{
2511 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2512
2513 const uint16_t iPhysExtStart = iPhysExt;
2514 PPGMPOOLPHYSEXT pPhysExt;
2515 do
2516 {
2517 Assert(iPhysExt < pPool->cMaxPhysExts);
2518 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2519 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2520 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2521
2522 /* next */
2523 iPhysExt = pPhysExt->iNext;
2524 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2525
2526 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2527 pPool->iPhysExtFreeHead = iPhysExtStart;
2528}
2529
2530/**
2531 * Insert a reference into a list of physical cross reference extents.
2532 *
2533 * @returns The new ram range flags (top 16-bits).
2534 *
2535 * @param pVM The VM handle.
2536 * @param iPhysExt The physical extent index of the list head.
2537 * @param iShwPT The shadow page table index.
2538 *
2539 */
2540static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2541{
2542 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2543 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2544
2545 /* special common case. */
2546 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2547 {
2548 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2549 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2550 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2551 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2552 }
2553
2554 /* general treatment. */
2555 const uint16_t iPhysExtStart = iPhysExt;
2556 unsigned cMax = 15;
2557 for (;;)
2558 {
2559 Assert(iPhysExt < pPool->cMaxPhysExts);
2560 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2561 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2562 {
2563 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2564 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2565 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2566 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2567 }
2568 if (!--cMax)
2569 {
2570 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2571 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2572 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2573 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2574 }
2575 }
2576
2577 /* add another extent to the list. */
2578 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2579 if (!pNew)
2580 {
2581 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2582 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2583 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2584 }
2585 pNew->iNext = iPhysExtStart;
2586 pNew->aidx[0] = iShwPT;
2587 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2588 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2589}
2590
2591
2592/**
2593 * Add a reference to guest physical page where extents are in use.
2594 *
2595 * @returns The new ram range flags (top 16-bits).
2596 *
2597 * @param pVM The VM handle.
2598 * @param u16 The ram range flags (top 16-bits).
2599 * @param iShwPT The shadow page table index.
2600 */
2601uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2602{
2603 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2604 {
2605 /*
2606 * Convert to extent list.
2607 */
2608 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2609 uint16_t iPhysExt;
2610 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2611 if (pPhysExt)
2612 {
2613 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2614 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2615 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2616 pPhysExt->aidx[1] = iShwPT;
2617 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2618 }
2619 else
2620 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2621 }
2622 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2623 {
2624 /*
2625 * Insert into the extent list.
2626 */
2627 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2628 }
2629 else
2630 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2631 return u16;
2632}
2633
2634
2635/**
2636 * Clear references to guest physical memory.
2637 *
2638 * @param pPool The pool.
2639 * @param pPage The page.
2640 * @param pPhysPage Pointer to the aPages entry in the ram range.
2641 */
2642void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
2643{
2644 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
2645 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2646
2647 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2648 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2649 {
2650 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2651 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2652 do
2653 {
2654 Assert(iPhysExt < pPool->cMaxPhysExts);
2655
2656 /*
2657 * Look for the shadow page and check if it's all freed.
2658 */
2659 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2660 {
2661 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2662 {
2663 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2664
2665 for (i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2666 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2667 {
2668 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2669 return;
2670 }
2671
2672 /* we can free the node. */
2673 PVM pVM = pPool->CTXSUFF(pVM);
2674 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2675 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2676 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2677 {
2678 /* lonely node */
2679 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2680 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
2681 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2682 }
2683 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2684 {
2685 /* head */
2686 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
2687 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
2688 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2689 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2690 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2691 }
2692 else
2693 {
2694 /* in list */
2695 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2696 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2697 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2698 }
2699 iPhysExt = iPhysExtNext;
2700 return;
2701 }
2702 }
2703
2704 /* next */
2705 iPhysExtPrev = iPhysExt;
2706 iPhysExt = paPhysExts[iPhysExt].iNext;
2707 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2708
2709 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2710 }
2711 else /* nothing to do */
2712 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
2713}
2714
2715
2716
2717/**
2718 * Clear references to guest physical memory.
2719 *
2720 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
2721 * is assumed to be correct, so the linear search can be skipped and we can assert
2722 * at an earlier point.
2723 *
2724 * @param pPool The pool.
2725 * @param pPage The page.
2726 * @param HCPhys The host physical address corresponding to the guest page.
2727 * @param GCPhys The guest physical address corresponding to HCPhys.
2728 */
2729static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
2730{
2731 /*
2732 * Walk range list.
2733 */
2734 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
2735 while (pRam)
2736 {
2737 RTGCPHYS off = GCPhys - pRam->GCPhys;
2738 if (off < pRam->cb)
2739 {
2740 /* does it match? */
2741 const unsigned iPage = off >> PAGE_SHIFT;
2742 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
2743 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2744 {
2745 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2746 return;
2747 }
2748 break;
2749 }
2750 pRam = CTXALLSUFF(pRam->pNext);
2751 }
2752 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
2753}
2754
2755
2756/**
2757 * Clear references to guest physical memory.
2758 *
2759 * @param pPool The pool.
2760 * @param pPage The page.
2761 * @param HCPhys The host physical address corresponding to the guest page.
2762 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
2763 */
2764static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
2765{
2766 /*
2767 * Walk range list.
2768 */
2769 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
2770 while (pRam)
2771 {
2772 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
2773 if (off < pRam->cb)
2774 {
2775 /* does it match? */
2776 const unsigned iPage = off >> PAGE_SHIFT;
2777 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
2778 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2779 {
2780 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2781 return;
2782 }
2783 break;
2784 }
2785 pRam = CTXALLSUFF(pRam->pNext);
2786 }
2787
2788 /*
2789 * Damn, the hint didn't work. We'll have to do an expensive linear search.
2790 */
2791 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
2792 pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
2793 while (pRam)
2794 {
2795 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2796 while (iPage-- > 0)
2797 {
2798 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2799 {
2800 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
2801 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
2802 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2803 return;
2804 }
2805 }
2806 pRam = CTXALLSUFF(pRam->pNext);
2807 }
2808
2809 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
2810}
2811
2812
2813/**
2814 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
2815 *
2816 * @param pPool The pool.
2817 * @param pPage The page.
2818 * @param pShwPT The shadow page table (mapping of the page).
2819 * @param pGstPT The guest page table.
2820 */
2821DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
2822{
2823 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pShwPT->a); i++)
2824 if (pShwPT->a[i].n.u1Present)
2825 {
2826 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
2827 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2828 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2829 if (!--pPage->cPresent)
2830 break;
2831 }
2832}
2833
2834
2835/**
2836 * Clear references to guest physical memory in a PAE / 32-bit page table.
2837 *
2838 * @param pPool The pool.
2839 * @param pPage The page.
2840 * @param pShwPT The shadow page table (mapping of the page).
2841 * @param pGstPT The guest page table (just a half one).
2842 */
2843DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
2844{
2845 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2846 if (pShwPT->a[i].n.u1Present)
2847 {
2848 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2849 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2850 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2851 }
2852}
2853
2854
2855/**
2856 * Clear references to guest physical memory in a PAE / PAE page table.
2857 *
2858 * @param pPool The pool.
2859 * @param pPage The page.
2860 * @param pShwPT The shadow page table (mapping of the page).
2861 * @param pGstPT The guest page table.
2862 */
2863DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
2864{
2865 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2866 if (pShwPT->a[i].n.u1Present)
2867 {
2868 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
2869 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
2870 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
2871 }
2872}
2873
2874
2875/**
2876 * Clear references to guest physical memory in a 32-bit / 4MB page table.
2877 *
2878 * @param pPool The pool.
2879 * @param pPage The page.
2880 * @param pShwPT The shadow page table (mapping of the page).
2881 */
2882DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
2883{
2884 RTGCPHYS GCPhys = pPage->GCPhys;
2885 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2886 if (pShwPT->a[i].n.u1Present)
2887 {
2888 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
2889 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
2890 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
2891 }
2892}
2893
2894
2895/**
2896 * Clear references to guest physical memory in a PAE / 2/4MB page table.
2897 *
2898 * @param pPool The pool.
2899 * @param pPage The page.
2900 * @param pShwPT The shadow page table (mapping of the page).
2901 */
2902DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
2903{
2904 RTGCPHYS GCPhys = pPage->GCPhys;
2905 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2906 if (pShwPT->a[i].n.u1Present)
2907 {
2908 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2909 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
2910 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
2911 }
2912}
2913#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2914
2915
2916/**
2917 * Clear references to shadowed pages in a PAE page directory.
2918 *
2919 * @param pPool The pool.
2920 * @param pPage The page.
2921 * @param pShwPD The shadow page directory (mapping of the page).
2922 */
2923DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
2924{
2925 for (unsigned i = 0; i < ELEMENTS(pShwPD->a); i++)
2926 {
2927 if (pShwPD->a[i].n.u1Present)
2928 {
2929 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
2930 if (pSubPage)
2931 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2932 else
2933 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
2934 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2935 }
2936 }
2937}
2938
2939
2940/**
2941 * Clear references to shadowed pages in a 64-bit page directory pointer table.
2942 *
2943 * @param pPool The pool.
2944 * @param pPage The page.
2945 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
2946 */
2947DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
2948{
2949 for (unsigned i = 0; i < ELEMENTS(pShwPDPT->a); i++)
2950 {
2951 if (pShwPDPT->a[i].n.u1Present)
2952 {
2953 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
2954 if (pSubPage)
2955 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2956 else
2957 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
2958 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2959 }
2960 }
2961}
2962
2963
2964/**
2965 * Clears all references made by this page.
2966 *
2967 * This includes other shadow pages and GC physical addresses.
2968 *
2969 * @param pPool The pool.
2970 * @param pPage The page.
2971 */
2972static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2973{
2974 /*
2975 * Map the shadow page and take action according to the page kind.
2976 */
2977 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2978 switch (pPage->enmKind)
2979 {
2980#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2981 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2982 {
2983 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2984 void *pvGst;
2985 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2986 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
2987 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2988 break;
2989 }
2990
2991 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2992 {
2993 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2994 void *pvGst;
2995 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2996 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
2997 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2998 break;
2999 }
3000
3001 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3002 {
3003 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3004 void *pvGst;
3005 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3006 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3007 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3008 break;
3009 }
3010
3011 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3012 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3013 {
3014 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3015 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3016 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3017 break;
3018 }
3019
3020 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 4 MB page */
3021 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3022 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3023 {
3024 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3025 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3026 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3027 break;
3028 }
3029
3030#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3031 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3032 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3033 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3034 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3035 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3036 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3037 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3038 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3039 break;
3040#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3041
3042 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
3043 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3044 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3045 break;
3046
3047 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3048 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3049 break;
3050
3051 default:
3052 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3053 }
3054
3055 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3056 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3057 ASMMemZeroPage(pvShw);
3058 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3059 pPage->fZeroed = true;
3060}
3061#endif /* PGMPOOL_WITH_USER_TRACKING */
3062
3063
3064/**
3065 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3066 *
3067 * @param pPool The pool.
3068 */
3069static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3070{
3071 /*
3072 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3073 */
3074 Assert(NIL_PGMPOOL_IDX == 0);
3075 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3076 {
3077 /*
3078 * Get the page address.
3079 */
3080 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3081 union
3082 {
3083 uint64_t *pau64;
3084 uint32_t *pau32;
3085 } u;
3086 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
3087
3088 /*
3089 * Mark stuff not present.
3090 */
3091 switch (pPage->enmKind)
3092 {
3093 case PGMPOOLKIND_ROOT_32BIT_PD:
3094 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3095 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3096 u.pau32[iPage] = 0;
3097 break;
3098
3099 case PGMPOOLKIND_ROOT_PAE_PD:
3100 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * X86_PG_PAE_PDPE_ENTRIES; iPage++)
3101 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3102 u.pau64[iPage] = 0;
3103 break;
3104
3105 case PGMPOOLKIND_ROOT_PML4:
3106 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
3107 if ((u.pau64[iPage] & (PGM_PLXFLAGS_PERMANENT | X86_PML4E_P)) == X86_PML4E_P)
3108 u.pau64[iPage] = 0;
3109 break;
3110
3111 case PGMPOOLKIND_ROOT_PDPT:
3112 /* Not root of shadowed pages currently, ignore it. */
3113 break;
3114 }
3115 }
3116
3117 /*
3118 * Paranoia (to be removed), flag a global CR3 sync.
3119 */
3120 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
3121}
3122
3123
3124/**
3125 * Flushes the entire cache.
3126 *
3127 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3128 * and execute this CR3 flush.
3129 *
3130 * @param pPool The pool.
3131 */
3132static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3133{
3134 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3135 LogFlow(("pgmPoolFlushAllInt:\n"));
3136
3137 /*
3138 * If there are no pages in the pool, there is nothing to do.
3139 */
3140 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3141 {
3142 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3143 return;
3144 }
3145
3146 /*
3147 * Nuke the free list and reinsert all pages into it.
3148 */
3149 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3150 {
3151 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3152
3153#ifdef IN_RING3
3154 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
3155#endif
3156#ifdef PGMPOOL_WITH_MONITORING
3157 if (pPage->fMonitored)
3158 pgmPoolMonitorFlush(pPool, pPage);
3159 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3160 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3161 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3162 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3163 pPage->cModifications = 0;
3164#endif
3165 pPage->GCPhys = NIL_RTGCPHYS;
3166 pPage->enmKind = PGMPOOLKIND_FREE;
3167 Assert(pPage->idx == i);
3168 pPage->iNext = i + 1;
3169 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3170 pPage->fSeenNonGlobal = false;
3171 pPage->fMonitored= false;
3172 pPage->fCached = false;
3173 pPage->fReusedFlushPending = false;
3174 pPage->fCR3Mix = false;
3175#ifdef PGMPOOL_WITH_USER_TRACKING
3176 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3177#endif
3178#ifdef PGMPOOL_WITH_CACHE
3179 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3180 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3181#endif
3182 }
3183 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3184 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3185 pPool->cUsedPages = 0;
3186
3187#ifdef PGMPOOL_WITH_USER_TRACKING
3188 /*
3189 * Zap and reinitialize the user records.
3190 */
3191 pPool->cPresent = 0;
3192 pPool->iUserFreeHead = 0;
3193 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3194 const unsigned cMaxUsers = pPool->cMaxUsers;
3195 for (unsigned i = 0; i < cMaxUsers; i++)
3196 {
3197 paUsers[i].iNext = i + 1;
3198 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3199 paUsers[i].iUserTable = 0xfffe;
3200 }
3201 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3202#endif
3203
3204#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3205 /*
3206 * Clear all the GCPhys links and rebuild the phys ext free list.
3207 */
3208 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3209 pRam;
3210 pRam = CTXALLSUFF(pRam->pNext))
3211 {
3212 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3213 while (iPage-- > 0)
3214 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3215 }
3216
3217 pPool->iPhysExtFreeHead = 0;
3218 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3219 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3220 for (unsigned i = 0; i < cMaxPhysExts; i++)
3221 {
3222 paPhysExts[i].iNext = i + 1;
3223 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3224 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3225 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3226 }
3227 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3228#endif
3229
3230#ifdef PGMPOOL_WITH_MONITORING
3231 /*
3232 * Just zap the modified list.
3233 */
3234 pPool->cModifiedPages = 0;
3235 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3236#endif
3237
3238#ifdef PGMPOOL_WITH_CACHE
3239 /*
3240 * Clear the GCPhys hash and the age list.
3241 */
3242 for (unsigned i = 0; i < ELEMENTS(pPool->aiHash); i++)
3243 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3244 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3245 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3246#endif
3247
3248 /*
3249 * Flush all the special root pages.
3250 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3251 */
3252 pgmPoolFlushAllSpecialRoots(pPool);
3253 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3254 {
3255 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3256 pPage->iNext = NIL_PGMPOOL_IDX;
3257#ifdef PGMPOOL_WITH_MONITORING
3258 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3259 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3260 pPage->cModifications = 0;
3261 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3262 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3263 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3264 if (pPage->fMonitored)
3265 {
3266 PVM pVM = pPool->CTXSUFF(pVM);
3267 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3268 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3269 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3270 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
3271 pPool->pszAccessHandler);
3272 AssertFatalRCSuccess(rc);
3273# ifdef PGMPOOL_WITH_CACHE
3274 pgmPoolHashInsert(pPool, pPage);
3275# endif
3276 }
3277#endif
3278#ifdef PGMPOOL_WITH_USER_TRACKING
3279 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3280#endif
3281#ifdef PGMPOOL_WITH_CACHE
3282 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3283 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3284#endif
3285 }
3286
3287 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3288}
3289
3290
3291/**
3292 * Flushes a pool page.
3293 *
3294 * This moves the page to the free list after removing all user references to it.
3295 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3296 *
3297 * @returns VBox status code.
3298 * @retval VINF_SUCCESS on success.
3299 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3300 * @param pPool The pool.
3301 * @param HCPhys The HC physical address of the shadow page.
3302 */
3303int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3304{
3305 int rc = VINF_SUCCESS;
3306 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3307 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3308 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3309
3310 /*
3311 * Quietly reject any attempts at flushing any of the special root pages.
3312 */
3313 if (pPage->idx < PGMPOOL_IDX_FIRST)
3314 {
3315 Log(("pgmPoolFlushPage: specaial root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3316 return VINF_SUCCESS;
3317 }
3318
3319 /*
3320 * Mark the page as being in need of a ASMMemZeroPage().
3321 */
3322 pPage->fZeroed = false;
3323
3324#ifdef PGMPOOL_WITH_USER_TRACKING
3325 /*
3326 * Clear the page.
3327 */
3328 pgmPoolTrackClearPageUsers(pPool, pPage);
3329 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3330 pgmPoolTrackDeref(pPool, pPage);
3331 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3332#endif
3333
3334#ifdef PGMPOOL_WITH_CACHE
3335 /*
3336 * Flush it from the cache.
3337 */
3338 pgmPoolCacheFlushPage(pPool, pPage);
3339#endif /* PGMPOOL_WITH_CACHE */
3340
3341#ifdef PGMPOOL_WITH_MONITORING
3342 /*
3343 * Deregistering the monitoring.
3344 */
3345 if (pPage->fMonitored)
3346 rc = pgmPoolMonitorFlush(pPool, pPage);
3347#endif
3348
3349 /*
3350 * Free the page.
3351 */
3352 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3353 pPage->iNext = pPool->iFreeHead;
3354 pPool->iFreeHead = pPage->idx;
3355 pPage->enmKind = PGMPOOLKIND_FREE;
3356 pPage->GCPhys = NIL_RTGCPHYS;
3357 pPage->fReusedFlushPending = false;
3358
3359 pPool->cUsedPages--;
3360 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3361 return rc;
3362}
3363
3364
3365/**
3366 * Frees a usage of a pool page.
3367 *
3368 * The caller is responsible to updating the user table so that it no longer
3369 * references the shadow page.
3370 *
3371 * @param pPool The pool.
3372 * @param HCPhys The HC physical address of the shadow page.
3373 * @param iUser The shadow page pool index of the user table.
3374 * @param iUserTable The index into the user table (shadowed).
3375 */
3376void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
3377{
3378 STAM_PROFILE_START(&pPool->StatFree, a);
3379 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3380 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3381 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3382#ifdef PGMPOOL_WITH_USER_TRACKING
3383 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3384#endif
3385#ifdef PGMPOOL_WITH_CACHE
3386 if (!pPage->fCached)
3387#endif
3388 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3389 STAM_PROFILE_STOP(&pPool->StatFree, a);
3390}
3391
3392
3393/**
3394 * Makes one or more free page free.
3395 *
3396 * @returns VBox status code.
3397 * @retval VINF_SUCCESS on success.
3398 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3399 *
3400 * @param pPool The pool.
3401 * @param iUser The user of the page.
3402 */
3403static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3404{
3405 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3406
3407 /*
3408 * If the pool isn't full grown yet, expand it.
3409 */
3410 if (pPool->cCurPages < pPool->cMaxPages)
3411 {
3412 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3413#ifdef IN_RING3
3414 int rc = PGMR3PoolGrow(pPool->pVMHC);
3415#else
3416 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3417#endif
3418 if (VBOX_FAILURE(rc))
3419 return rc;
3420 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3421 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3422 return VINF_SUCCESS;
3423 }
3424
3425#ifdef PGMPOOL_WITH_CACHE
3426 /*
3427 * Free one cached page.
3428 */
3429 return pgmPoolCacheFreeOne(pPool, iUser);
3430#else
3431 /*
3432 * Flush the pool.
3433 * If we have tracking enabled, it should be possible to come up with
3434 * a cheap replacement strategy...
3435 */
3436 pgmPoolFlushAllInt(pPool);
3437 return VERR_PGM_POOL_FLUSHED;
3438#endif
3439}
3440
3441
3442/**
3443 * Allocates a page from the pool.
3444 *
3445 * This page may actually be a cached page and not in need of any processing
3446 * on the callers part.
3447 *
3448 * @returns VBox status code.
3449 * @retval VINF_SUCCESS if a NEW page was allocated.
3450 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3451 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3452 * @param pVM The VM handle.
3453 * @param GCPhys The GC physical address of the page we're gonna shadow.
3454 * For 4MB and 2MB PD entries, it's the first address the
3455 * shadow PT is covering.
3456 * @param enmKind The kind of mapping.
3457 * @param iUser The shadow page pool index of the user table.
3458 * @param iUserTable The index into the user table (shadowed).
3459 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3460 */
3461int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
3462{
3463 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3464 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3465 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3466 *ppPage = NULL;
3467
3468#ifdef PGMPOOL_WITH_CACHE
3469 if (pPool->fCacheEnabled)
3470 {
3471 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3472 if (VBOX_SUCCESS(rc2))
3473 {
3474 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3475 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3476 return rc2;
3477 }
3478 }
3479#endif
3480
3481 /*
3482 * Allocate a new one.
3483 */
3484 int rc = VINF_SUCCESS;
3485 uint16_t iNew = pPool->iFreeHead;
3486 if (iNew == NIL_PGMPOOL_IDX)
3487 {
3488 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3489 if (VBOX_FAILURE(rc))
3490 {
3491 if (rc != VERR_PGM_POOL_CLEARED)
3492 {
3493 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3494 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3495 return rc;
3496 }
3497 rc = VERR_PGM_POOL_FLUSHED;
3498 }
3499 iNew = pPool->iFreeHead;
3500 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3501 }
3502
3503 /* unlink the free head */
3504 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3505 pPool->iFreeHead = pPage->iNext;
3506 pPage->iNext = NIL_PGMPOOL_IDX;
3507
3508 /*
3509 * Initialize it.
3510 */
3511 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3512 pPage->enmKind = enmKind;
3513 pPage->GCPhys = GCPhys;
3514 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3515 pPage->fMonitored = false;
3516 pPage->fCached = false;
3517 pPage->fReusedFlushPending = false;
3518 pPage->fCR3Mix = false;
3519#ifdef PGMPOOL_WITH_MONITORING
3520 pPage->cModifications = 0;
3521 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3522 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3523#endif
3524#ifdef PGMPOOL_WITH_USER_TRACKING
3525 pPage->cPresent = 0;
3526 pPage->iFirstPresent = ~0;
3527
3528 /*
3529 * Insert into the tracking and cache. If this fails, free the page.
3530 */
3531 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3532 if (VBOX_FAILURE(rc3))
3533 {
3534 if (rc3 != VERR_PGM_POOL_CLEARED)
3535 {
3536 pPool->cUsedPages--;
3537 pPage->enmKind = PGMPOOLKIND_FREE;
3538 pPage->GCPhys = NIL_RTGCPHYS;
3539 pPage->iNext = pPool->iFreeHead;
3540 pPool->iFreeHead = pPage->idx;
3541 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3542 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3543 return rc3;
3544 }
3545 rc = VERR_PGM_POOL_FLUSHED;
3546 }
3547#endif /* PGMPOOL_WITH_USER_TRACKING */
3548
3549 /*
3550 * Commit the allocation, clear the page and return.
3551 */
3552#ifdef VBOX_WITH_STATISTICS
3553 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3554 pPool->cUsedPagesHigh = pPool->cUsedPages;
3555#endif
3556
3557 if (!pPage->fZeroed)
3558 {
3559 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3560 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3561 ASMMemZeroPage(pv);
3562 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3563 }
3564
3565 *ppPage = pPage;
3566 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3567 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3568 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3569 return rc;
3570}
3571
3572
3573/**
3574 * Frees a usage of a pool page.
3575 *
3576 * @param pVM The VM handle.
3577 * @param HCPhys The HC physical address of the shadow page.
3578 * @param iUser The shadow page pool index of the user table.
3579 * @param iUserTable The index into the user table (shadowed).
3580 */
3581void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint16_t iUserTable)
3582{
3583 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
3584 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3585 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
3586}
3587
3588
3589/**
3590 * Gets a in-use page in the pool by it's physical address.
3591 *
3592 * @returns Pointer to the page.
3593 * @param pVM The VM handle.
3594 * @param HCPhys The HC physical address of the shadow page.
3595 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
3596 */
3597PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
3598{
3599 /** @todo profile this! */
3600 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3601 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
3602 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
3603 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
3604 return pPage;
3605}
3606
3607
3608/**
3609 * Flushes the entire cache.
3610 *
3611 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3612 * and execute this CR3 flush.
3613 *
3614 * @param pPool The pool.
3615 */
3616void pgmPoolFlushAll(PVM pVM)
3617{
3618 LogFlow(("pgmPoolFlushAll:\n"));
3619 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
3620}
3621
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette