VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 10700

Last change on this file since 10700 was 10405, checked in by vboxsync, 16 years ago

Treat pages as reused when changing at least 0x40 qwords with rep movsq/stosq.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 139.3 KB
Line 
1/* $Id: PGMAllPool.cpp 10405 2008-07-09 11:24:56Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_GC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37
38#include <VBox/log.h>
39#include <VBox/err.h>
40#include <iprt/asm.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46__BEGIN_DECLS
47static void pgmPoolFlushAllInt(PPGMPOOL pPool);
48#ifdef PGMPOOL_WITH_USER_TRACKING
49DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
50DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
51static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#endif
53#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
54static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
55#endif
56#ifdef PGMPOOL_WITH_CACHE
57static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
58#endif
59#ifdef PGMPOOL_WITH_MONITORING
60static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
61#endif
62#ifndef IN_RING3
63DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
64#endif
65__END_DECLS
66
67
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86
87
88#ifdef IN_GC
89/**
90 * Maps a pool page into the current context.
91 *
92 * @returns Pointer to the mapping.
93 * @param pVM The VM handle.
94 * @param pPage The page to map.
95 */
96void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
97{
98 /* general pages. */
99 if (pPage->idx >= PGMPOOL_IDX_FIRST)
100 {
101 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
102 void *pv;
103 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
104 AssertReleaseRC(rc);
105 return pv;
106 }
107
108 /* special pages. */
109 switch (pPage->idx)
110 {
111 case PGMPOOL_IDX_PD:
112 return pVM->pgm.s.pGC32BitPD;
113 case PGMPOOL_IDX_PAE_PD:
114 case PGMPOOL_IDX_PAE_PD_0:
115 return pVM->pgm.s.apGCPaePDs[0];
116 case PGMPOOL_IDX_PAE_PD_1:
117 return pVM->pgm.s.apGCPaePDs[1];
118 case PGMPOOL_IDX_PAE_PD_2:
119 return pVM->pgm.s.apGCPaePDs[2];
120 case PGMPOOL_IDX_PAE_PD_3:
121 return pVM->pgm.s.apGCPaePDs[3];
122 case PGMPOOL_IDX_PDPT:
123 return pVM->pgm.s.pGCPaePDPT;
124 default:
125 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
126 return NULL;
127 }
128}
129#endif /* IN_GC */
130
131
132#ifdef PGMPOOL_WITH_MONITORING
133/**
134 * Determin the size of a write instruction.
135 * @returns number of bytes written.
136 * @param pDis The disassembler state.
137 */
138static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
139{
140 /*
141 * This is very crude and possibly wrong for some opcodes,
142 * but since it's not really supposed to be called we can
143 * probably live with that.
144 */
145 return DISGetParamSize(pDis, &pDis->param1);
146}
147
148
149/**
150 * Flushes a chain of pages sharing the same access monitor.
151 *
152 * @returns VBox status code suitable for scheduling.
153 * @param pPool The pool.
154 * @param pPage A page in the chain.
155 */
156int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
157{
158 LogFlow(("pgmPoolMonitorChainFlush: Flush page %VGp type=%d\n", pPage->GCPhys, pPage->enmKind));
159
160 /*
161 * Find the list head.
162 */
163 uint16_t idx = pPage->idx;
164 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
165 {
166 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
167 {
168 idx = pPage->iMonitoredPrev;
169 Assert(idx != pPage->idx);
170 pPage = &pPool->aPages[idx];
171 }
172 }
173
174 /*
175 * Iterate the list flushing each shadow page.
176 */
177 int rc = VINF_SUCCESS;
178 for (;;)
179 {
180 idx = pPage->iMonitoredNext;
181 Assert(idx != pPage->idx);
182 if (pPage->idx >= PGMPOOL_IDX_FIRST)
183 {
184 int rc2 = pgmPoolFlushPage(pPool, pPage);
185 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
186 rc = VINF_PGM_SYNC_CR3;
187 }
188 /* next */
189 if (idx == NIL_PGMPOOL_IDX)
190 break;
191 pPage = &pPool->aPages[idx];
192 }
193 return rc;
194}
195
196
197/**
198 * Wrapper for getting the current context pointer to the entry being modified.
199 *
200 * @returns Pointer to the current context mapping of the entry.
201 * @param pPool The pool.
202 * @param pvFault The fault virtual address.
203 * @param GCPhysFault The fault physical address.
204 * @param cbEntry The entry size.
205 */
206#ifdef IN_RING3
207DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
208#else
209DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
210#endif
211{
212#ifdef IN_GC
213 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
214
215#elif defined(IN_RING0)
216 void *pvRet;
217 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
218 AssertFatalRCSuccess(rc);
219 return pvRet;
220
221#elif defined(IN_RING3)
222 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
223#else
224# error "huh?"
225#endif
226}
227
228
229/**
230 * Process shadow entries before they are changed by the guest.
231 *
232 * For PT entries we will clear them. For PD entries, we'll simply check
233 * for mapping conflicts and set the SyncCR3 FF if found.
234 *
235 * @param pPool The pool.
236 * @param pPage The head page.
237 * @param GCPhysFault The guest physical fault address.
238 * @param uAddress In R0 and GC this is the guest context fault address (flat).
239 * In R3 this is the host context 'fault' address.
240 * @param pCpu The disassembler state for figuring out the write size.
241 * This need not be specified if the caller knows we won't do cross entry accesses.
242 */
243#ifdef IN_RING3
244void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
245#else
246void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
247#endif
248{
249 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
250 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
251 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
252
253 LogFlow(("pgmPoolMonitorChainChanging: %VGv phys=%VGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
254
255 for (;;)
256 {
257 union
258 {
259 void *pv;
260 PX86PT pPT;
261 PX86PTPAE pPTPae;
262 PX86PD pPD;
263 PX86PDPAE pPDPae;
264 PX86PDPT pPDPT;
265 PX86PML4 pPML4;
266 } uShw;
267 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
268
269 switch (pPage->enmKind)
270 {
271 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
272 {
273 const unsigned iShw = off / sizeof(X86PTE);
274 if (uShw.pPT->a[iShw].n.u1Present)
275 {
276# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
277 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
278 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
279 pgmPoolTracDerefGCPhysHint(pPool, pPage,
280 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
281 pGstPte->u & X86_PTE_PG_MASK);
282# endif
283 uShw.pPT->a[iShw].u = 0;
284 }
285 break;
286 }
287
288 /* page/2 sized */
289 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
290 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
291 {
292 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
293 if (uShw.pPTPae->a[iShw].n.u1Present)
294 {
295# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
296 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
297 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
298 pgmPoolTracDerefGCPhysHint(pPool, pPage,
299 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
300 pGstPte->u & X86_PTE_PG_MASK);
301# endif
302 uShw.pPTPae->a[iShw].u = 0;
303 }
304 }
305 break;
306
307 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
308 {
309 const unsigned iShw = off / sizeof(X86PTEPAE);
310 if (uShw.pPTPae->a[iShw].n.u1Present)
311 {
312# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
313 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
314 Log4(("pgmPoolMonitorChainChanging pae: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
315 pgmPoolTracDerefGCPhysHint(pPool, pPage,
316 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
317 pGstPte->u & X86_PTE_PAE_PG_MASK);
318# endif
319 uShw.pPTPae->a[iShw].u = 0;
320 }
321
322 /* paranoia / a bit assumptive. */
323 if ( pCpu
324 && (off & 7)
325 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
326 {
327 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
328 AssertReturnVoid(iShw2 < ELEMENTS(uShw.pPTPae->a));
329
330 if (uShw.pPTPae->a[iShw2].n.u1Present)
331 {
332# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
333 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
334 Log4(("pgmPoolMonitorChainChanging pae: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
335 pgmPoolTracDerefGCPhysHint(pPool, pPage,
336 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
337 pGstPte->u & X86_PTE_PAE_PG_MASK);
338# endif
339 uShw.pPTPae->a[iShw2].u = 0;
340 }
341 }
342
343 break;
344 }
345
346 case PGMPOOLKIND_ROOT_32BIT_PD:
347 {
348 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
349 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
350 {
351 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
352 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
353 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
354 }
355 /* paranoia / a bit assumptive. */
356 else if ( pCpu
357 && (off & 3)
358 && (off & 3) + cbWrite > sizeof(X86PTE))
359 {
360 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
361 if ( iShw2 != iShw
362 && iShw2 < ELEMENTS(uShw.pPD->a)
363 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
364 {
365 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
366 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
367 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
368 }
369 }
370#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
371 if ( uShw.pPD->a[iShw].n.u1Present
372 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
373 {
374 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
375# ifdef IN_GC /* TLB load - we're pushing things a bit... */
376 ASMProbeReadByte(pvAddress);
377# endif
378 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
379 uShw.pPD->a[iShw].u = 0;
380 }
381#endif
382 break;
383 }
384
385 case PGMPOOLKIND_ROOT_PAE_PD:
386 {
387 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
388 for (unsigned i = 0; i < 2; i++, iShw++)
389 {
390 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
391 {
392 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
393 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
394 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
395 }
396 /* paranoia / a bit assumptive. */
397 else if ( pCpu
398 && (off & 3)
399 && (off & 3) + cbWrite > 4)
400 {
401 const unsigned iShw2 = iShw + 2;
402 if ( iShw2 < ELEMENTS(uShw.pPDPae->a)
403 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
404 {
405 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
406 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
407 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
408 }
409 }
410#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
411 if ( uShw.pPDPae->a[iShw].n.u1Present
412 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
413 {
414 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
415# ifdef IN_GC /* TLB load - we're pushing things a bit... */
416 ASMProbeReadByte(pvAddress);
417# endif
418 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
419 uShw.pPDPae->a[iShw].u = 0;
420 }
421#endif
422 }
423 break;
424 }
425
426 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
427 {
428 const unsigned iShw = off / sizeof(X86PDEPAE);
429 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
430 {
431 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
432 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
433 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
434 }
435#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
436 /* causes trouble when the guest uses a PDE to refer to the whole page table level structure. (invalidate here; faults later on when it tries
437 * to change the page table entries
438 * -> recheck; probably only applies to the GC case
439 */
440 else
441 {
442 if (uShw.pPDPae->a[iShw].n.u1Present)
443 {
444 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
445 pgmPoolFree(pPool->CTXSUFF(pVM),
446 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
447 /* Note: hardcoded PAE implementation dependency */
448 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
449 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
450 uShw.pPDPae->a[iShw].u = 0;
451 }
452 }
453#endif
454 /* paranoia / a bit assumptive. */
455 if ( pCpu
456 && (off & 7)
457 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
458 {
459 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
460 AssertReturnVoid(iShw2 < ELEMENTS(uShw.pPDPae->a));
461
462 if ( iShw2 != iShw
463 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
464 {
465 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
466 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
467 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
468 }
469#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
470 else
471 if (uShw.pPDPae->a[iShw2].n.u1Present)
472 {
473 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
474 pgmPoolFree(pPool->CTXSUFF(pVM),
475 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
476 /* Note: hardcoded PAE implementation dependency */
477 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
478 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
479 uShw.pPDPae->a[iShw2].u = 0;
480 }
481#endif
482 }
483 break;
484 }
485
486 case PGMPOOLKIND_ROOT_PDPT:
487 {
488 /* Hopefully this doesn't happen very often:
489 * - touching unused parts of the page
490 * - messing with the bits of pd pointers without changing the physical address
491 */
492 const unsigned iShw = off / sizeof(X86PDPE);
493 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
494 {
495 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
496 {
497 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
498 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
499 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
500 }
501 /* paranoia / a bit assumptive. */
502 else if ( pCpu
503 && (off & 7)
504 && (off & 7) + cbWrite > sizeof(X86PDPE))
505 {
506 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
507 if ( iShw2 != iShw
508 && iShw2 < X86_PG_PAE_PDPE_ENTRIES
509 && uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
510 {
511 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
512 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
513 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
514 }
515 }
516 }
517 break;
518 }
519
520#ifndef IN_GC
521 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
522 {
523 Assert(pPage->enmKind == PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD);
524
525 const unsigned iShw = off / sizeof(X86PDEPAE);
526 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
527 {
528 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
529 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
530 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
531 }
532 else
533 {
534 if (uShw.pPDPae->a[iShw].n.u1Present)
535 {
536 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
537 pgmPoolFree(pPool->CTXSUFF(pVM),
538 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
539 pPage->idx,
540 iShw);
541 uShw.pPDPae->a[iShw].u = 0;
542 }
543 }
544 /* paranoia / a bit assumptive. */
545 if ( pCpu
546 && (off & 7)
547 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
548 {
549 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
550 AssertReturnVoid(iShw2 < ELEMENTS(uShw.pPDPae->a));
551
552 if ( iShw2 != iShw
553 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
554 {
555 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
556 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
557 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
558 }
559 else
560 if (uShw.pPDPae->a[iShw2].n.u1Present)
561 {
562 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
563 pgmPoolFree(pPool->CTXSUFF(pVM),
564 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
565 pPage->idx,
566 iShw2);
567 uShw.pPDPae->a[iShw2].u = 0;
568 }
569 }
570 break;
571 }
572
573 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
574 {
575 /* Hopefully this doesn't happen very often:
576 * - messing with the bits of pd pointers without changing the physical address
577 */
578 if (!VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
579 {
580 const unsigned iShw = off / sizeof(X86PDPE);
581 if (uShw.pPDPT->a[iShw].n.u1Present)
582 {
583 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
584 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
585 uShw.pPDPT->a[iShw].u = 0;
586 }
587 /* paranoia / a bit assumptive. */
588 if ( pCpu
589 && (off & 7)
590 && (off & 7) + cbWrite > sizeof(X86PDPE))
591 {
592 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
593 if (uShw.pPDPT->a[iShw2].n.u1Present)
594 {
595 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
596 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
597 uShw.pPDPT->a[iShw2].u = 0;
598 }
599 }
600 }
601 break;
602 }
603
604 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
605 {
606 /* Hopefully this doesn't happen very often:
607 * - messing with the bits of pd pointers without changing the physical address
608 */
609 if (!VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
610 {
611 const unsigned iShw = off / sizeof(X86PDPE);
612 if (uShw.pPML4->a[iShw].n.u1Present)
613 {
614 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
615 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
616 uShw.pPML4->a[iShw].u = 0;
617 }
618 /* paranoia / a bit assumptive. */
619 if ( pCpu
620 && (off & 7)
621 && (off & 7) + cbWrite > sizeof(X86PDPE))
622 {
623 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
624 if (uShw.pPML4->a[iShw2].n.u1Present)
625 {
626 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
627 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
628 uShw.pPML4->a[iShw2].u = 0;
629 }
630 }
631 }
632 break;
633 }
634#endif /* IN_RING0 */
635
636 default:
637 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
638 }
639
640 /* next */
641 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
642 return;
643 pPage = &pPool->aPages[pPage->iMonitoredNext];
644 }
645}
646
647
648# ifndef IN_RING3
649/**
650 * Checks if a access could be a fork operation in progress.
651 *
652 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
653 *
654 * @returns true if it's likly that we're forking, otherwise false.
655 * @param pPool The pool.
656 * @param pCpu The disassembled instruction.
657 * @param offFault The access offset.
658 */
659DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
660{
661 /*
662 * i386 linux is using btr to clear X86_PTE_RW.
663 * The functions involved are (2.6.16 source inspection):
664 * clear_bit
665 * ptep_set_wrprotect
666 * copy_one_pte
667 * copy_pte_range
668 * copy_pmd_range
669 * copy_pud_range
670 * copy_page_range
671 * dup_mmap
672 * dup_mm
673 * copy_mm
674 * copy_process
675 * do_fork
676 */
677 if ( pCpu->pCurInstr->opcode == OP_BTR
678 && !(offFault & 4)
679 /** @todo Validate that the bit index is X86_PTE_RW. */
680 )
681 {
682 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
683 return true;
684 }
685 return false;
686}
687
688
689/**
690 * Determine whether the page is likely to have been reused.
691 *
692 * @returns true if we consider the page as being reused for a different purpose.
693 * @returns false if we consider it to still be a paging page.
694 * @param pPage The page in question.
695 * @param pRegFrame Trap register frame.
696 * @param pCpu The disassembly info for the faulting insturction.
697 * @param pvFault The fault address.
698 *
699 * @remark The REP prefix check is left to the caller because of STOSD/W.
700 */
701DECLINLINE(bool) pgmPoolMonitorIsReused(PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
702{
703 switch (pCpu->pCurInstr->opcode)
704 {
705 case OP_PUSH:
706 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
707 return true;
708 case OP_PUSHF:
709 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
710 return true;
711 case OP_PUSHA:
712 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
713 return true;
714 case OP_FXSAVE:
715 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
716 return true;
717 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
718 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
719 return true;
720 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
721 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
722 return true;
723 case OP_MOVSWD:
724 case OP_STOSWD:
725 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
726 && pRegFrame->rcx >= 0x40
727 )
728 {
729 Assert(pCpu->mode == CPUMODE_64BIT);
730
731 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
732 return true;
733 }
734 return false;
735 }
736 if ( (pCpu->param1.flags & USE_REG_GEN32)
737 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
738 {
739 Log4(("pgmPoolMonitorIsReused: ESP\n"));
740 return true;
741 }
742
743 //if (pPage->fCR3Mix)
744 // return false;
745 return false;
746}
747
748
749/**
750 * Flushes the page being accessed.
751 *
752 * @returns VBox status code suitable for scheduling.
753 * @param pVM The VM handle.
754 * @param pPool The pool.
755 * @param pPage The pool page (head).
756 * @param pCpu The disassembly of the write instruction.
757 * @param pRegFrame The trap register frame.
758 * @param GCPhysFault The fault address as guest physical address.
759 * @param pvFault The fault address.
760 */
761static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
762 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
763{
764 /*
765 * First, do the flushing.
766 */
767 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
768
769 /*
770 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
771 */
772 uint32_t cbWritten;
773 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
774 if (VBOX_SUCCESS(rc2))
775 pRegFrame->rip += pCpu->opsize;
776 else if (rc2 == VERR_EM_INTERPRETER)
777 {
778#ifdef IN_GC
779 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
780 {
781 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
782 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
783 rc = VINF_SUCCESS;
784 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
785 }
786 else
787#endif
788 {
789 rc = VINF_EM_RAW_EMULATE_INSTR;
790 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
791 }
792 }
793 else
794 rc = rc2;
795
796 /* See use in pgmPoolAccessHandlerSimple(). */
797 PGM_INVL_GUEST_TLBS();
798
799 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
800 return rc;
801
802}
803
804
805/**
806 * Handles the STOSD write accesses.
807 *
808 * @returns VBox status code suitable for scheduling.
809 * @param pVM The VM handle.
810 * @param pPool The pool.
811 * @param pPage The pool page (head).
812 * @param pCpu The disassembly of the write instruction.
813 * @param pRegFrame The trap register frame.
814 * @param GCPhysFault The fault address as guest physical address.
815 * @param pvFault The fault address.
816 */
817DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
818 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
819{
820 /*
821 * Increment the modification counter and insert it into the list
822 * of modified pages the first time.
823 */
824 if (!pPage->cModifications++)
825 pgmPoolMonitorModifiedInsert(pPool, pPage);
826
827 /*
828 * Execute REP STOSD.
829 *
830 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
831 * write situation, meaning that it's safe to write here.
832 */
833 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
834 while (pRegFrame->ecx)
835 {
836 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
837#ifdef IN_GC
838 *(uint32_t *)pu32 = pRegFrame->eax;
839#else
840 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
841#endif
842 pu32 += 4;
843 GCPhysFault += 4;
844 pRegFrame->edi += 4;
845 pRegFrame->ecx--;
846 }
847 pRegFrame->rip += pCpu->opsize;
848
849 /* See use in pgmPoolAccessHandlerSimple(). */
850 PGM_INVL_GUEST_TLBS();
851
852 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
853 return VINF_SUCCESS;
854}
855
856
857/**
858 * Handles the simple write accesses.
859 *
860 * @returns VBox status code suitable for scheduling.
861 * @param pVM The VM handle.
862 * @param pPool The pool.
863 * @param pPage The pool page (head).
864 * @param pCpu The disassembly of the write instruction.
865 * @param pRegFrame The trap register frame.
866 * @param GCPhysFault The fault address as guest physical address.
867 * @param pvFault The fault address.
868 */
869DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
870 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
871{
872 /*
873 * Increment the modification counter and insert it into the list
874 * of modified pages the first time.
875 */
876 if (!pPage->cModifications++)
877 pgmPoolMonitorModifiedInsert(pPool, pPage);
878
879 /*
880 * Clear all the pages. ASSUMES that pvFault is readable.
881 */
882 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
883
884 /*
885 * Interpret the instruction.
886 */
887 uint32_t cb;
888 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
889 if (VBOX_SUCCESS(rc))
890 pRegFrame->rip += pCpu->opsize;
891 else if (rc == VERR_EM_INTERPRETER)
892 {
893 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv - opcode=%d\n",
894 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
895 rc = VINF_EM_RAW_EMULATE_INSTR;
896 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
897 }
898
899 /*
900 * Quick hack, with logging enabled we're getting stale
901 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
902 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
903 * have to be fixed to support this. But that'll have to wait till next week.
904 *
905 * An alternative is to keep track of the changed PTEs together with the
906 * GCPhys from the guest PT. This may proove expensive though.
907 *
908 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
909 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
910 */
911 PGM_INVL_GUEST_TLBS();
912
913 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
914 return rc;
915}
916
917
918/**
919 * \#PF Handler callback for PT write accesses.
920 *
921 * @returns VBox status code (appropriate for GC return).
922 * @param pVM VM Handle.
923 * @param uErrorCode CPU Error code.
924 * @param pRegFrame Trap register frame.
925 * NULL on DMA and other non CPU access.
926 * @param pvFault The fault address (cr2).
927 * @param GCPhysFault The GC physical address corresponding to pvFault.
928 * @param pvUser User argument.
929 */
930DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
931{
932 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
933 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
934 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
935 LogFlow(("pgmPoolAccessHandler: pvFault=%VGv pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
936
937 /*
938 * We should ALWAYS have the list head as user parameter. This
939 * is because we use that page to record the changes.
940 */
941 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
942
943 /*
944 * Disassemble the faulting instruction.
945 */
946 DISCPUSTATE Cpu;
947 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
948 AssertRCReturn(rc, rc);
949
950 /*
951 * Check if it's worth dealing with.
952 */
953 bool fReused = false;
954 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
955 || pPage->fCR3Mix)
956 && !(fReused = pgmPoolMonitorIsReused(pPage, pRegFrame, &Cpu, pvFault))
957 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
958 {
959 /*
960 * Simple instructions, no REP prefix.
961 */
962 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
963 {
964 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
965 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
966 return rc;
967 }
968
969 /*
970 * Windows is frequently doing small memset() operations (netio test 4k+).
971 * We have to deal with these or we'll kill the cache and performance.
972 */
973 if ( Cpu.pCurInstr->opcode == OP_STOSWD
974 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
975 && pRegFrame->ecx <= 0x20
976 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
977 && !((uintptr_t)pvFault & 3)
978 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
979 && Cpu.mode == CPUMODE_32BIT
980 && Cpu.opmode == CPUMODE_32BIT
981 && Cpu.addrmode == CPUMODE_32BIT
982 && Cpu.prefix == PREFIX_REP
983 && !pRegFrame->eflags.Bits.u1DF
984 )
985 {
986 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
987 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
988 return rc;
989 }
990
991 /* REP prefix, don't bother. */
992 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
993 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%VGv opcode=%d prefix=%#x\n",
994 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
995 }
996
997 /*
998 * Not worth it, so flush it.
999 *
1000 * If we considered it to be reused, don't to back to ring-3
1001 * to emulate failed instructions since we usually cannot
1002 * interpret then. This may be a bit risky, in which case
1003 * the reuse detection must be fixed.
1004 */
1005 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1006 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1007 rc = VINF_SUCCESS;
1008 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
1009 return rc;
1010}
1011
1012# endif /* !IN_RING3 */
1013#endif /* PGMPOOL_WITH_MONITORING */
1014
1015
1016
1017#ifdef PGMPOOL_WITH_CACHE
1018/**
1019 * Inserts a page into the GCPhys hash table.
1020 *
1021 * @param pPool The pool.
1022 * @param pPage The page.
1023 */
1024DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1025{
1026 Log3(("pgmPoolHashInsert: %VGp\n", pPage->GCPhys));
1027 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1028 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1029 pPage->iNext = pPool->aiHash[iHash];
1030 pPool->aiHash[iHash] = pPage->idx;
1031}
1032
1033
1034/**
1035 * Removes a page from the GCPhys hash table.
1036 *
1037 * @param pPool The pool.
1038 * @param pPage The page.
1039 */
1040DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1041{
1042 Log3(("pgmPoolHashRemove: %VGp\n", pPage->GCPhys));
1043 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1044 if (pPool->aiHash[iHash] == pPage->idx)
1045 pPool->aiHash[iHash] = pPage->iNext;
1046 else
1047 {
1048 uint16_t iPrev = pPool->aiHash[iHash];
1049 for (;;)
1050 {
1051 const int16_t i = pPool->aPages[iPrev].iNext;
1052 if (i == pPage->idx)
1053 {
1054 pPool->aPages[iPrev].iNext = pPage->iNext;
1055 break;
1056 }
1057 if (i == NIL_PGMPOOL_IDX)
1058 {
1059 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1060 break;
1061 }
1062 iPrev = i;
1063 }
1064 }
1065 pPage->iNext = NIL_PGMPOOL_IDX;
1066}
1067
1068
1069/**
1070 * Frees up one cache page.
1071 *
1072 * @returns VBox status code.
1073 * @retval VINF_SUCCESS on success.
1074 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1075 * @param pPool The pool.
1076 * @param iUser The user index.
1077 */
1078static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1079{
1080#ifndef IN_GC
1081 const PVM pVM = pPool->CTXSUFF(pVM);
1082#endif
1083 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1084 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1085
1086 /*
1087 * Select one page from the tail of the age list.
1088 */
1089 uint16_t iToFree = pPool->iAgeTail;
1090 if (iToFree == iUser)
1091 iToFree = pPool->aPages[iToFree].iAgePrev;
1092/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1093 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1094 {
1095 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1096 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1097 {
1098 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1099 continue;
1100 iToFree = i;
1101 break;
1102 }
1103 }
1104*/
1105 Assert(iToFree != iUser);
1106 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1107
1108 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
1109 if (rc == VINF_SUCCESS)
1110 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1111 return rc;
1112}
1113
1114
1115/**
1116 * Checks if a kind mismatch is really a page being reused
1117 * or if it's just normal remappings.
1118 *
1119 * @returns true if reused and the cached page (enmKind1) should be flushed
1120 * @returns false if not reused.
1121 * @param enmKind1 The kind of the cached page.
1122 * @param enmKind2 The kind of the requested page.
1123 */
1124static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1125{
1126 switch (enmKind1)
1127 {
1128 /*
1129 * Never reuse them. There is no remapping in non-paging mode.
1130 */
1131 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1132 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1133 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1134 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1135 return true;
1136
1137 /*
1138 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1139 */
1140 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1141 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1142 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1143 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1144 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1145 switch (enmKind2)
1146 {
1147 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1148 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1149 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1150 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1151 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1152 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1153 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1154 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1155 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1156 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1157 return true;
1158 default:
1159 return false;
1160 }
1161
1162 /*
1163 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1164 */
1165 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1166 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1167 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1168 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1169 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1170 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1171 switch (enmKind2)
1172 {
1173 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1174 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1175 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1176 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1177 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1178 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1179 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1180 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1181 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1182 return true;
1183 default:
1184 return false;
1185 }
1186
1187 /*
1188 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1189 */
1190 case PGMPOOLKIND_ROOT_32BIT_PD:
1191 case PGMPOOLKIND_ROOT_PAE_PD:
1192 case PGMPOOLKIND_ROOT_PDPT:
1193 case PGMPOOLKIND_ROOT_NESTED:
1194 return false;
1195
1196 default:
1197 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1198 }
1199}
1200
1201
1202/**
1203 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1204 *
1205 * @returns VBox status code.
1206 * @retval VINF_PGM_CACHED_PAGE on success.
1207 * @retval VERR_FILE_NOT_FOUND if not found.
1208 * @param pPool The pool.
1209 * @param GCPhys The GC physical address of the page we're gonna shadow.
1210 * @param enmKind The kind of mapping.
1211 * @param iUser The shadow page pool index of the user table.
1212 * @param iUserTable The index into the user table (shadowed).
1213 * @param ppPage Where to store the pointer to the page.
1214 */
1215static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1216{
1217#ifndef IN_GC
1218 const PVM pVM = pPool->CTXSUFF(pVM);
1219#endif
1220 /*
1221 * Look up the GCPhys in the hash.
1222 */
1223 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1224 Log3(("pgmPoolCacheAlloc: %VGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1225 if (i != NIL_PGMPOOL_IDX)
1226 {
1227 do
1228 {
1229 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1230 Log3(("pgmPoolCacheAlloc: slot %d found page %VGp\n", i, pPage->GCPhys));
1231 if (pPage->GCPhys == GCPhys)
1232 {
1233 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1234 {
1235 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1236 if (VBOX_SUCCESS(rc))
1237 {
1238 *ppPage = pPage;
1239 STAM_COUNTER_INC(&pPool->StatCacheHits);
1240 return VINF_PGM_CACHED_PAGE;
1241 }
1242 return rc;
1243 }
1244
1245 /*
1246 * The kind is different. In some cases we should now flush the page
1247 * as it has been reused, but in most cases this is normal remapping
1248 * of PDs as PT or big pages using the GCPhys field in a slightly
1249 * different way than the other kinds.
1250 */
1251 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1252 {
1253 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1254 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1255 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1256 break;
1257 }
1258 }
1259
1260 /* next */
1261 i = pPage->iNext;
1262 } while (i != NIL_PGMPOOL_IDX);
1263 }
1264
1265 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1266 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1267 return VERR_FILE_NOT_FOUND;
1268}
1269
1270
1271/**
1272 * Inserts a page into the cache.
1273 *
1274 * @param pPool The pool.
1275 * @param pPage The cached page.
1276 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1277 */
1278static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1279{
1280 /*
1281 * Insert into the GCPhys hash if the page is fit for that.
1282 */
1283 Assert(!pPage->fCached);
1284 if (fCanBeCached)
1285 {
1286 pPage->fCached = true;
1287 pgmPoolHashInsert(pPool, pPage);
1288 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1289 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1290 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1291 }
1292 else
1293 {
1294 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1295 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1296 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1297 }
1298
1299 /*
1300 * Insert at the head of the age list.
1301 */
1302 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1303 pPage->iAgeNext = pPool->iAgeHead;
1304 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1305 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1306 else
1307 pPool->iAgeTail = pPage->idx;
1308 pPool->iAgeHead = pPage->idx;
1309}
1310
1311
1312/**
1313 * Flushes a cached page.
1314 *
1315 * @param pPool The pool.
1316 * @param pPage The cached page.
1317 */
1318static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1319{
1320 Log3(("pgmPoolCacheFlushPage: %VGp\n", pPage->GCPhys));
1321
1322 /*
1323 * Remove the page from the hash.
1324 */
1325 if (pPage->fCached)
1326 {
1327 pPage->fCached = false;
1328 pgmPoolHashRemove(pPool, pPage);
1329 }
1330 else
1331 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1332
1333 /*
1334 * Remove it from the age list.
1335 */
1336 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1337 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1338 else
1339 pPool->iAgeTail = pPage->iAgePrev;
1340 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1341 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1342 else
1343 pPool->iAgeHead = pPage->iAgeNext;
1344 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1345 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1346}
1347#endif /* PGMPOOL_WITH_CACHE */
1348
1349
1350#ifdef PGMPOOL_WITH_MONITORING
1351/**
1352 * Looks for pages sharing the monitor.
1353 *
1354 * @returns Pointer to the head page.
1355 * @returns NULL if not found.
1356 * @param pPool The Pool
1357 * @param pNewPage The page which is going to be monitored.
1358 */
1359static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1360{
1361#ifdef PGMPOOL_WITH_CACHE
1362 /*
1363 * Look up the GCPhys in the hash.
1364 */
1365 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1366 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1367 if (i == NIL_PGMPOOL_IDX)
1368 return NULL;
1369 do
1370 {
1371 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1372 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1373 && pPage != pNewPage)
1374 {
1375 switch (pPage->enmKind)
1376 {
1377 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1378 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1379 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1380 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1381 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1382 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1383 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1384 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1385 case PGMPOOLKIND_ROOT_32BIT_PD:
1386 case PGMPOOLKIND_ROOT_PAE_PD:
1387 case PGMPOOLKIND_ROOT_PDPT:
1388 {
1389 /* find the head */
1390 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1391 {
1392 Assert(pPage->iMonitoredPrev != pPage->idx);
1393 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1394 }
1395 return pPage;
1396 }
1397
1398 /* ignore, no monitoring. */
1399 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1400 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1401 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1402 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1403 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1404 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1405 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1406 case PGMPOOLKIND_ROOT_NESTED:
1407 break;
1408 default:
1409 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1410 }
1411 }
1412
1413 /* next */
1414 i = pPage->iNext;
1415 } while (i != NIL_PGMPOOL_IDX);
1416#endif
1417 return NULL;
1418}
1419
1420/**
1421 * Enabled write monitoring of a guest page.
1422 *
1423 * @returns VBox status code.
1424 * @retval VINF_SUCCESS on success.
1425 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1426 * @param pPool The pool.
1427 * @param pPage The cached page.
1428 */
1429static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1430{
1431 LogFlow(("pgmPoolMonitorInsert %VGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1432
1433 /*
1434 * Filter out the relevant kinds.
1435 */
1436 switch (pPage->enmKind)
1437 {
1438 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1439 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1440 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1441 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1442 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1443 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1444 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1445 case PGMPOOLKIND_ROOT_PDPT:
1446 break;
1447
1448 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1449 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1450 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1451 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1452 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1453 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1454 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1455 case PGMPOOLKIND_ROOT_NESTED:
1456 /* Nothing to monitor here. */
1457 return VINF_SUCCESS;
1458
1459 case PGMPOOLKIND_ROOT_32BIT_PD:
1460 case PGMPOOLKIND_ROOT_PAE_PD:
1461#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1462 break;
1463#endif
1464 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1465 default:
1466 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1467 }
1468
1469 /*
1470 * Install handler.
1471 */
1472 int rc;
1473 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1474 if (pPageHead)
1475 {
1476 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1477 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1478 pPage->iMonitoredPrev = pPageHead->idx;
1479 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1480 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1481 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1482 pPageHead->iMonitoredNext = pPage->idx;
1483 rc = VINF_SUCCESS;
1484 }
1485 else
1486 {
1487 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1488 PVM pVM = pPool->CTXSUFF(pVM);
1489 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1490 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1491 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1492 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1493 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1494 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
1495 pPool->pszAccessHandler);
1496 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1497 * the heap size should suffice. */
1498 AssertFatalRC(rc);
1499 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1500 rc = VERR_PGM_POOL_CLEARED;
1501 }
1502 pPage->fMonitored = true;
1503 return rc;
1504}
1505
1506
1507/**
1508 * Disables write monitoring of a guest page.
1509 *
1510 * @returns VBox status code.
1511 * @retval VINF_SUCCESS on success.
1512 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1513 * @param pPool The pool.
1514 * @param pPage The cached page.
1515 */
1516static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1517{
1518 /*
1519 * Filter out the relevant kinds.
1520 */
1521 switch (pPage->enmKind)
1522 {
1523 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1524 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1525 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1526 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1527 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1528 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1529 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1530 case PGMPOOLKIND_ROOT_PDPT:
1531 break;
1532
1533 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1534 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1535 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1536 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1537 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1538 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1539 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1540 case PGMPOOLKIND_ROOT_NESTED:
1541 /* Nothing to monitor here. */
1542 return VINF_SUCCESS;
1543
1544 case PGMPOOLKIND_ROOT_32BIT_PD:
1545 case PGMPOOLKIND_ROOT_PAE_PD:
1546#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1547 break;
1548#endif
1549 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1550 default:
1551 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1552 }
1553
1554 /*
1555 * Remove the page from the monitored list or uninstall it if last.
1556 */
1557 const PVM pVM = pPool->CTXSUFF(pVM);
1558 int rc;
1559 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1560 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1561 {
1562 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1563 {
1564 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1565 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1566 pNewHead->fCR3Mix = pPage->fCR3Mix;
1567 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1568 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1569 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1570 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pNewHead),
1571 pPool->pszAccessHandler);
1572 AssertFatalRCSuccess(rc);
1573 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1574 }
1575 else
1576 {
1577 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1578 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1579 {
1580 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1581 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1582 }
1583 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1584 rc = VINF_SUCCESS;
1585 }
1586 }
1587 else
1588 {
1589 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1590 AssertFatalRC(rc);
1591 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1592 rc = VERR_PGM_POOL_CLEARED;
1593 }
1594 pPage->fMonitored = false;
1595
1596 /*
1597 * Remove it from the list of modified pages (if in it).
1598 */
1599 pgmPoolMonitorModifiedRemove(pPool, pPage);
1600
1601 return rc;
1602}
1603
1604
1605#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1606/**
1607 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1608 *
1609 * @param pPool The Pool.
1610 * @param pPage A page in the chain.
1611 * @param fCR3Mix The new fCR3Mix value.
1612 */
1613static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1614{
1615 /* current */
1616 pPage->fCR3Mix = fCR3Mix;
1617
1618 /* before */
1619 int16_t idx = pPage->iMonitoredPrev;
1620 while (idx != NIL_PGMPOOL_IDX)
1621 {
1622 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1623 idx = pPool->aPages[idx].iMonitoredPrev;
1624 }
1625
1626 /* after */
1627 idx = pPage->iMonitoredNext;
1628 while (idx != NIL_PGMPOOL_IDX)
1629 {
1630 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1631 idx = pPool->aPages[idx].iMonitoredNext;
1632 }
1633}
1634
1635
1636/**
1637 * Installs or modifies monitoring of a CR3 page (special).
1638 *
1639 * We're pretending the CR3 page is shadowed by the pool so we can use the
1640 * generic mechanisms in detecting chained monitoring. (This also gives us a
1641 * tast of what code changes are required to really pool CR3 shadow pages.)
1642 *
1643 * @returns VBox status code.
1644 * @param pPool The pool.
1645 * @param idxRoot The CR3 (root) page index.
1646 * @param GCPhysCR3 The (new) CR3 value.
1647 */
1648int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1649{
1650 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1651 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1652 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1653 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1654
1655 /*
1656 * The unlikely case where it already matches.
1657 */
1658 if (pPage->GCPhys == GCPhysCR3)
1659 {
1660 Assert(pPage->fMonitored);
1661 return VINF_SUCCESS;
1662 }
1663
1664 /*
1665 * Flush the current monitoring and remove it from the hash.
1666 */
1667 int rc = VINF_SUCCESS;
1668 if (pPage->fMonitored)
1669 {
1670 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1671 rc = pgmPoolMonitorFlush(pPool, pPage);
1672 if (rc == VERR_PGM_POOL_CLEARED)
1673 rc = VINF_SUCCESS;
1674 else
1675 AssertFatalRC(rc);
1676 pgmPoolHashRemove(pPool, pPage);
1677 }
1678
1679 /*
1680 * Monitor the page at the new location and insert it into the hash.
1681 */
1682 pPage->GCPhys = GCPhysCR3;
1683 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1684 if (rc2 != VERR_PGM_POOL_CLEARED)
1685 {
1686 AssertFatalRC(rc2);
1687 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1688 rc = rc2;
1689 }
1690 pgmPoolHashInsert(pPool, pPage);
1691 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1692 return rc;
1693}
1694
1695
1696/**
1697 * Removes the monitoring of a CR3 page (special).
1698 *
1699 * @returns VBox status code.
1700 * @param pPool The pool.
1701 * @param idxRoot The CR3 (root) page index.
1702 */
1703int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1704{
1705 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1706 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1707 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1708 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1709
1710 if (!pPage->fMonitored)
1711 return VINF_SUCCESS;
1712
1713 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1714 int rc = pgmPoolMonitorFlush(pPool, pPage);
1715 if (rc != VERR_PGM_POOL_CLEARED)
1716 AssertFatalRC(rc);
1717 else
1718 rc = VINF_SUCCESS;
1719 pgmPoolHashRemove(pPool, pPage);
1720 Assert(!pPage->fMonitored);
1721 pPage->GCPhys = NIL_RTGCPHYS;
1722 return rc;
1723}
1724#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1725
1726
1727/**
1728 * Inserts the page into the list of modified pages.
1729 *
1730 * @param pPool The pool.
1731 * @param pPage The page.
1732 */
1733void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1734{
1735 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1736 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1737 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1738 && pPool->iModifiedHead != pPage->idx,
1739 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1740 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1741 pPool->iModifiedHead, pPool->cModifiedPages));
1742
1743 pPage->iModifiedNext = pPool->iModifiedHead;
1744 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1745 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1746 pPool->iModifiedHead = pPage->idx;
1747 pPool->cModifiedPages++;
1748#ifdef VBOX_WITH_STATISTICS
1749 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1750 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1751#endif
1752}
1753
1754
1755/**
1756 * Removes the page from the list of modified pages and resets the
1757 * moficiation counter.
1758 *
1759 * @param pPool The pool.
1760 * @param pPage The page which is believed to be in the list of modified pages.
1761 */
1762static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1763{
1764 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1765 if (pPool->iModifiedHead == pPage->idx)
1766 {
1767 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1768 pPool->iModifiedHead = pPage->iModifiedNext;
1769 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1770 {
1771 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1772 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1773 }
1774 pPool->cModifiedPages--;
1775 }
1776 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1777 {
1778 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1779 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1780 {
1781 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1782 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1783 }
1784 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1785 pPool->cModifiedPages--;
1786 }
1787 else
1788 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1789 pPage->cModifications = 0;
1790}
1791
1792
1793/**
1794 * Zaps the list of modified pages, resetting their modification counters in the process.
1795 *
1796 * @param pVM The VM handle.
1797 */
1798void pgmPoolMonitorModifiedClearAll(PVM pVM)
1799{
1800 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1801 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1802
1803 unsigned cPages = 0; NOREF(cPages);
1804 uint16_t idx = pPool->iModifiedHead;
1805 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1806 while (idx != NIL_PGMPOOL_IDX)
1807 {
1808 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1809 idx = pPage->iModifiedNext;
1810 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1811 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1812 pPage->cModifications = 0;
1813 Assert(++cPages);
1814 }
1815 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1816 pPool->cModifiedPages = 0;
1817}
1818
1819
1820/**
1821 * Clear all shadow pages and clear all modification counters.
1822 *
1823 * @param pVM The VM handle.
1824 * @remark Should only be used when monitoring is available, thus placed in
1825 * the PGMPOOL_WITH_MONITORING #ifdef.
1826 */
1827void pgmPoolClearAll(PVM pVM)
1828{
1829 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1830 STAM_PROFILE_START(&pPool->StatClearAll, c);
1831 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1832
1833 /*
1834 * Iterate all the pages until we've encountered all that in use.
1835 * This is simple but not quite optimal solution.
1836 */
1837 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1838 unsigned cLeft = pPool->cUsedPages;
1839 unsigned iPage = pPool->cCurPages;
1840 while (--iPage >= PGMPOOL_IDX_FIRST)
1841 {
1842 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1843 if (pPage->GCPhys != NIL_RTGCPHYS)
1844 {
1845 switch (pPage->enmKind)
1846 {
1847 /*
1848 * We only care about shadow page tables.
1849 */
1850 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1851 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1852 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1853 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1854 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1855 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1856 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1857 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1858 {
1859#ifdef PGMPOOL_WITH_USER_TRACKING
1860 if (pPage->cPresent)
1861#endif
1862 {
1863 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1864 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1865 ASMMemZeroPage(pvShw);
1866 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1867#ifdef PGMPOOL_WITH_USER_TRACKING
1868 pPage->cPresent = 0;
1869 pPage->iFirstPresent = ~0;
1870#endif
1871 }
1872 }
1873 /* fall thru */
1874
1875 default:
1876 Assert(!pPage->cModifications || ++cModifiedPages);
1877 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1878 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1879 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1880 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1881 pPage->cModifications = 0;
1882 break;
1883
1884 }
1885 if (!--cLeft)
1886 break;
1887 }
1888 }
1889
1890 /* swipe the special pages too. */
1891 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1892 {
1893 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1894 if (pPage->GCPhys != NIL_RTGCPHYS)
1895 {
1896 Assert(!pPage->cModifications || ++cModifiedPages);
1897 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1898 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1899 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1900 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1901 pPage->cModifications = 0;
1902 }
1903 }
1904
1905#ifndef DEBUG_michael
1906 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1907#endif
1908 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1909 pPool->cModifiedPages = 0;
1910
1911#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1912 /*
1913 * Clear all the GCPhys links and rebuild the phys ext free list.
1914 */
1915 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
1916 pRam;
1917 pRam = CTXALLSUFF(pRam->pNext))
1918 {
1919 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1920 while (iPage-- > 0)
1921 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
1922 }
1923
1924 pPool->iPhysExtFreeHead = 0;
1925 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1926 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1927 for (unsigned i = 0; i < cMaxPhysExts; i++)
1928 {
1929 paPhysExts[i].iNext = i + 1;
1930 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1931 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1932 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1933 }
1934 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1935#endif
1936
1937
1938 pPool->cPresent = 0;
1939 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1940}
1941
1942/**
1943 * Handle SyncCR3 pool tasks
1944 *
1945 * @returns VBox status code.
1946 * @retval VINF_SUCCESS if successfully added.
1947 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
1948 * @param pVM The VM handle.
1949 * @remark Should only be used when monitoring is available, thus placed in
1950 * the PGMPOOL_WITH_MONITORING #ifdef.
1951 */
1952int pgmPoolSyncCR3(PVM pVM)
1953{
1954 /*
1955 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
1956 * Occasionally we will have to clear all the shadow page tables because we wanted
1957 * to monitor a page which was mapped by too many shadowed page tables. This operation
1958 * sometimes refered to as a 'lightweight flush'.
1959 */
1960 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
1961 pgmPoolMonitorModifiedClearAll(pVM);
1962 else
1963 {
1964# ifndef IN_GC
1965 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
1966 pgmPoolClearAll(pVM);
1967# else
1968 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
1969 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
1970 return VINF_PGM_SYNC_CR3;
1971# endif
1972 }
1973 return VINF_SUCCESS;
1974}
1975#endif /* PGMPOOL_WITH_MONITORING */
1976
1977#ifdef PGMPOOL_WITH_USER_TRACKING
1978/**
1979 * Frees up at least one user entry.
1980 *
1981 * @returns VBox status code.
1982 * @retval VINF_SUCCESS if successfully added.
1983 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1984 * @param pPool The pool.
1985 * @param iUser The user index.
1986 */
1987static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
1988{
1989 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
1990#ifdef PGMPOOL_WITH_CACHE
1991 /*
1992 * Just free cached pages in a braindead fashion.
1993 */
1994 /** @todo walk the age list backwards and free the first with usage. */
1995 int rc = VINF_SUCCESS;
1996 do
1997 {
1998 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
1999 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
2000 rc = rc2;
2001 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2002 return rc;
2003#else
2004 /*
2005 * Lazy approach.
2006 */
2007 pgmPoolFlushAllInt(pPool);
2008 return VERR_PGM_POOL_FLUSHED;
2009#endif
2010}
2011
2012
2013/**
2014 * Inserts a page into the cache.
2015 *
2016 * This will create user node for the page, insert it into the GCPhys
2017 * hash, and insert it into the age list.
2018 *
2019 * @returns VBox status code.
2020 * @retval VINF_SUCCESS if successfully added.
2021 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2022 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2023 * @param pPool The pool.
2024 * @param pPage The cached page.
2025 * @param GCPhys The GC physical address of the page we're gonna shadow.
2026 * @param iUser The user index.
2027 * @param iUserTable The user table index.
2028 */
2029DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2030{
2031 int rc = VINF_SUCCESS;
2032 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
2033
2034 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
2035
2036 /*
2037 * Find free a user node.
2038 */
2039 uint16_t i = pPool->iUserFreeHead;
2040 if (i == NIL_PGMPOOL_USER_INDEX)
2041 {
2042 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2043 if (VBOX_FAILURE(rc))
2044 return rc;
2045 i = pPool->iUserFreeHead;
2046 }
2047
2048 /*
2049 * Unlink the user node from the free list,
2050 * initialize and insert it into the user list.
2051 */
2052 pPool->iUserFreeHead = pUser[i].iNext;
2053 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
2054 pUser[i].iUser = iUser;
2055 pUser[i].iUserTable = iUserTable;
2056 pPage->iUserHead = i;
2057
2058 /*
2059 * Insert into cache and enable monitoring of the guest page if enabled.
2060 *
2061 * Until we implement caching of all levels, including the CR3 one, we'll
2062 * have to make sure we don't try monitor & cache any recursive reuse of
2063 * a monitored CR3 page. Because all windows versions are doing this we'll
2064 * have to be able to do combined access monitoring, CR3 + PT and
2065 * PD + PT (guest PAE).
2066 *
2067 * Update:
2068 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2069 */
2070#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2071# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2072 const bool fCanBeMonitored = true;
2073# else
2074 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2075 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2076 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2077# endif
2078# ifdef PGMPOOL_WITH_CACHE
2079 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2080# endif
2081 if (fCanBeMonitored)
2082 {
2083# ifdef PGMPOOL_WITH_MONITORING
2084 rc = pgmPoolMonitorInsert(pPool, pPage);
2085 if (rc == VERR_PGM_POOL_CLEARED)
2086 {
2087 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2088# ifndef PGMPOOL_WITH_CACHE
2089 pgmPoolMonitorFlush(pPool, pPage);
2090 rc = VERR_PGM_POOL_FLUSHED;
2091# endif
2092 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2093 pUser[i].iNext = pPool->iUserFreeHead;
2094 pUser[i].iUser = NIL_PGMPOOL_IDX;
2095 pPool->iUserFreeHead = i;
2096 }
2097 }
2098# endif
2099#endif /* PGMPOOL_WITH_MONITORING */
2100 return rc;
2101}
2102
2103
2104# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2105/**
2106 * Adds a user reference to a page.
2107 *
2108 * This will
2109 * This will move the page to the head of the
2110 *
2111 * @returns VBox status code.
2112 * @retval VINF_SUCCESS if successfully added.
2113 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2114 * @param pPool The pool.
2115 * @param pPage The cached page.
2116 * @param iUser The user index.
2117 * @param iUserTable The user table.
2118 */
2119static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2120{
2121 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2122
2123 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
2124# ifdef VBOX_STRICT
2125 /*
2126 * Check that the entry doesn't already exists.
2127 */
2128 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2129 {
2130 uint16_t i = pPage->iUserHead;
2131 do
2132 {
2133 Assert(i < pPool->cMaxUsers);
2134 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2135 i = paUsers[i].iNext;
2136 } while (i != NIL_PGMPOOL_USER_INDEX);
2137 }
2138# endif
2139
2140 /*
2141 * Allocate a user node.
2142 */
2143 uint16_t i = pPool->iUserFreeHead;
2144 if (i == NIL_PGMPOOL_USER_INDEX)
2145 {
2146 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2147 if (VBOX_FAILURE(rc))
2148 return rc;
2149 i = pPool->iUserFreeHead;
2150 }
2151 pPool->iUserFreeHead = paUsers[i].iNext;
2152
2153 /*
2154 * Initialize the user node and insert it.
2155 */
2156 paUsers[i].iNext = pPage->iUserHead;
2157 paUsers[i].iUser = iUser;
2158 paUsers[i].iUserTable = iUserTable;
2159 pPage->iUserHead = i;
2160
2161# ifdef PGMPOOL_WITH_CACHE
2162 /*
2163 * Tell the cache to update its replacement stats for this page.
2164 */
2165 pgmPoolCacheUsed(pPool, pPage);
2166# endif
2167 return VINF_SUCCESS;
2168}
2169# endif /* PGMPOOL_WITH_CACHE */
2170
2171
2172/**
2173 * Frees a user record associated with a page.
2174 *
2175 * This does not clear the entry in the user table, it simply replaces the
2176 * user record to the chain of free records.
2177 *
2178 * @param pPool The pool.
2179 * @param HCPhys The HC physical address of the shadow page.
2180 * @param iUser The shadow page pool index of the user table.
2181 * @param iUserTable The index into the user table (shadowed).
2182 */
2183static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2184{
2185 /*
2186 * Unlink and free the specified user entry.
2187 */
2188 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2189
2190 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2191 uint16_t i = pPage->iUserHead;
2192 if ( i != NIL_PGMPOOL_USER_INDEX
2193 && paUsers[i].iUser == iUser
2194 && paUsers[i].iUserTable == iUserTable)
2195 {
2196 pPage->iUserHead = paUsers[i].iNext;
2197
2198 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2199 paUsers[i].iNext = pPool->iUserFreeHead;
2200 pPool->iUserFreeHead = i;
2201 return;
2202 }
2203
2204 /* General: Linear search. */
2205 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2206 while (i != NIL_PGMPOOL_USER_INDEX)
2207 {
2208 if ( paUsers[i].iUser == iUser
2209 && paUsers[i].iUserTable == iUserTable)
2210 {
2211 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2212 paUsers[iPrev].iNext = paUsers[i].iNext;
2213 else
2214 pPage->iUserHead = paUsers[i].iNext;
2215
2216 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2217 paUsers[i].iNext = pPool->iUserFreeHead;
2218 pPool->iUserFreeHead = i;
2219 return;
2220 }
2221 iPrev = i;
2222 i = paUsers[i].iNext;
2223 }
2224
2225 /* Fatal: didn't find it */
2226 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
2227 iUser, iUserTable, pPage->GCPhys));
2228}
2229
2230
2231/**
2232 * Gets the entry size of a shadow table.
2233 *
2234 * @param enmKind The kind of page.
2235 *
2236 * @returns The size of the entry in bytes. That is, 4 or 8.
2237 * @returns If the kind is not for a table, an assertion is raised and 0 is
2238 * returned.
2239 */
2240DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2241{
2242 switch (enmKind)
2243 {
2244 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2245 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2246 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2247 case PGMPOOLKIND_ROOT_32BIT_PD:
2248 return 4;
2249
2250 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2251 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2252 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2253 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2254 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2255 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2256 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2257 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2258 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2259 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2260 case PGMPOOLKIND_ROOT_PAE_PD:
2261 case PGMPOOLKIND_ROOT_PDPT:
2262 case PGMPOOLKIND_ROOT_NESTED:
2263 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2264 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2265 return 8;
2266
2267 default:
2268 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2269 }
2270}
2271
2272
2273/**
2274 * Gets the entry size of a guest table.
2275 *
2276 * @param enmKind The kind of page.
2277 *
2278 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2279 * @returns If the kind is not for a table, an assertion is raised and 0 is
2280 * returned.
2281 */
2282DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2283{
2284 switch (enmKind)
2285 {
2286 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2287 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2288 case PGMPOOLKIND_ROOT_32BIT_PD:
2289 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2290 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2291 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2292 return 4;
2293
2294 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2295 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2296 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2297 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2298 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2299 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2300 case PGMPOOLKIND_ROOT_PAE_PD:
2301 case PGMPOOLKIND_ROOT_PDPT:
2302 return 8;
2303
2304 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2305 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2306 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2307 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2308 case PGMPOOLKIND_ROOT_NESTED:
2309 /** @todo can we return 0? (nobody is calling this...) */
2310 AssertFailed();
2311 return 0;
2312
2313 default:
2314 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2315 }
2316}
2317
2318
2319#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2320/**
2321 * Scans one shadow page table for mappings of a physical page.
2322 *
2323 * @param pVM The VM handle.
2324 * @param pPhysPage The guest page in question.
2325 * @param iShw The shadow page table.
2326 * @param cRefs The number of references made in that PT.
2327 */
2328static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2329{
2330 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2331 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2332
2333 /*
2334 * Assert sanity.
2335 */
2336 Assert(cRefs == 1);
2337 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2338 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2339
2340 /*
2341 * Then, clear the actual mappings to the page in the shadow PT.
2342 */
2343 switch (pPage->enmKind)
2344 {
2345 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2346 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2347 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2348 {
2349 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2350 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2351 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2352 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2353 {
2354 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2355 pPT->a[i].u = 0;
2356 cRefs--;
2357 if (!cRefs)
2358 return;
2359 }
2360#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2361 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2362 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2363 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2364 {
2365 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2366 pPT->a[i].u = 0;
2367 }
2368#endif
2369 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2370 break;
2371 }
2372
2373 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2374 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2375 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2376 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2377 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2378 {
2379 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2380 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2381 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2382 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2383 {
2384 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2385 pPT->a[i].u = 0;
2386 cRefs--;
2387 if (!cRefs)
2388 return;
2389 }
2390#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2391 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2392 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2393 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2394 {
2395 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2396 pPT->a[i].u = 0;
2397 }
2398#endif
2399 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2400 break;
2401 }
2402
2403 default:
2404 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2405 }
2406}
2407
2408
2409/**
2410 * Scans one shadow page table for mappings of a physical page.
2411 *
2412 * @param pVM The VM handle.
2413 * @param pPhysPage The guest page in question.
2414 * @param iShw The shadow page table.
2415 * @param cRefs The number of references made in that PT.
2416 */
2417void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2418{
2419 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2420 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2421 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2422 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2423 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2424 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2425}
2426
2427
2428/**
2429 * Flushes a list of shadow page tables mapping the same physical page.
2430 *
2431 * @param pVM The VM handle.
2432 * @param pPhysPage The guest page in question.
2433 * @param iPhysExt The physical cross reference extent list to flush.
2434 */
2435void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2436{
2437 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2438 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2439 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2440
2441 const uint16_t iPhysExtStart = iPhysExt;
2442 PPGMPOOLPHYSEXT pPhysExt;
2443 do
2444 {
2445 Assert(iPhysExt < pPool->cMaxPhysExts);
2446 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2447 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2448 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2449 {
2450 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2451 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2452 }
2453
2454 /* next */
2455 iPhysExt = pPhysExt->iNext;
2456 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2457
2458 /* insert the list into the free list and clear the ram range entry. */
2459 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2460 pPool->iPhysExtFreeHead = iPhysExtStart;
2461 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2462
2463 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2464}
2465#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2466
2467
2468/**
2469 * Scans all shadow page tables for mappings of a physical page.
2470 *
2471 * This may be slow, but it's most likely more efficient than cleaning
2472 * out the entire page pool / cache.
2473 *
2474 * @returns VBox status code.
2475 * @retval VINF_SUCCESS if all references has been successfully cleared.
2476 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2477 * a page pool cleaning.
2478 *
2479 * @param pVM The VM handle.
2480 * @param pPhysPage The guest page in question.
2481 */
2482int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2483{
2484 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2485 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2486 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2487 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2488
2489#if 1
2490 /*
2491 * There is a limit to what makes sense.
2492 */
2493 if (pPool->cPresent > 1024)
2494 {
2495 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2496 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2497 return VINF_PGM_GCPHYS_ALIASED;
2498 }
2499#endif
2500
2501 /*
2502 * Iterate all the pages until we've encountered all that in use.
2503 * This is simple but not quite optimal solution.
2504 */
2505 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2506 const uint32_t u32 = u64;
2507 unsigned cLeft = pPool->cUsedPages;
2508 unsigned iPage = pPool->cCurPages;
2509 while (--iPage >= PGMPOOL_IDX_FIRST)
2510 {
2511 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2512 if (pPage->GCPhys != NIL_RTGCPHYS)
2513 {
2514 switch (pPage->enmKind)
2515 {
2516 /*
2517 * We only care about shadow page tables.
2518 */
2519 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2520 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2521 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2522 {
2523 unsigned cPresent = pPage->cPresent;
2524 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2525 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2526 if (pPT->a[i].n.u1Present)
2527 {
2528 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2529 {
2530 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2531 pPT->a[i].u = 0;
2532 }
2533 if (!--cPresent)
2534 break;
2535 }
2536 break;
2537 }
2538
2539 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2540 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2541 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2542 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2543 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2544 {
2545 unsigned cPresent = pPage->cPresent;
2546 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2547 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2548 if (pPT->a[i].n.u1Present)
2549 {
2550 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2551 {
2552 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2553 pPT->a[i].u = 0;
2554 }
2555 if (!--cPresent)
2556 break;
2557 }
2558 break;
2559 }
2560 }
2561 if (!--cLeft)
2562 break;
2563 }
2564 }
2565
2566 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2567 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2568 return VINF_SUCCESS;
2569}
2570
2571
2572/**
2573 * Clears the user entry in a user table.
2574 *
2575 * This is used to remove all references to a page when flushing it.
2576 */
2577static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2578{
2579 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2580 Assert(pUser->iUser < pPool->cCurPages);
2581
2582 /*
2583 * Map the user page.
2584 */
2585 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2586 union
2587 {
2588 uint64_t *pau64;
2589 uint32_t *pau32;
2590 } u;
2591 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pUserPage);
2592
2593 /* Safety precaution in case we change the paging for other modes too in the future. */
2594 Assert(PGMGetHyperCR3(CTXSUFF(pPool->pVM)) != pPage->Core.Key);
2595
2596#ifdef VBOX_STRICT
2597 /*
2598 * Some sanity checks.
2599 */
2600 switch (pUserPage->enmKind)
2601 {
2602 case PGMPOOLKIND_ROOT_32BIT_PD:
2603 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2604 Assert(!(u.pau32[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2605 break;
2606 case PGMPOOLKIND_ROOT_PAE_PD:
2607 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2608 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2609 break;
2610 case PGMPOOLKIND_ROOT_PDPT:
2611 Assert(pUser->iUserTable < 4);
2612 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2613 break;
2614 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2615 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2616 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2617 break;
2618 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2619 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2620 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2621 break;
2622 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2623 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2624 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2625 break;
2626 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2627 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2628 /* GCPhys >> PAGE_SHIFT is the index here */
2629 break;
2630 case PGMPOOLKIND_ROOT_NESTED:
2631 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2632 break;
2633
2634 default:
2635 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2636 break;
2637 }
2638#endif /* VBOX_STRICT */
2639
2640 /*
2641 * Clear the entry in the user page.
2642 */
2643 switch (pUserPage->enmKind)
2644 {
2645 /* 32-bit entries */
2646 case PGMPOOLKIND_ROOT_32BIT_PD:
2647 u.pau32[pUser->iUserTable] = 0;
2648 break;
2649
2650 /* 64-bit entries */
2651 case PGMPOOLKIND_ROOT_PAE_PD:
2652 case PGMPOOLKIND_ROOT_PDPT:
2653 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2654 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2655 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2656 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2657 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2658 case PGMPOOLKIND_ROOT_NESTED:
2659 u.pau64[pUser->iUserTable] = 0;
2660 break;
2661
2662 default:
2663 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2664 }
2665}
2666
2667
2668/**
2669 * Clears all users of a page.
2670 */
2671static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2672{
2673 /*
2674 * Free all the user records.
2675 */
2676 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2677 uint16_t i = pPage->iUserHead;
2678 while (i != NIL_PGMPOOL_USER_INDEX)
2679 {
2680 /* Clear enter in user table. */
2681 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2682
2683 /* Free it. */
2684 const uint16_t iNext = paUsers[i].iNext;
2685 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2686 paUsers[i].iNext = pPool->iUserFreeHead;
2687 pPool->iUserFreeHead = i;
2688
2689 /* Next. */
2690 i = iNext;
2691 }
2692 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2693}
2694
2695
2696#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2697/**
2698 * Allocates a new physical cross reference extent.
2699 *
2700 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2701 * @param pVM The VM handle.
2702 * @param piPhysExt Where to store the phys ext index.
2703 */
2704PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2705{
2706 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2707 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2708 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2709 {
2710 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2711 return NULL;
2712 }
2713 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2714 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2715 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2716 *piPhysExt = iPhysExt;
2717 return pPhysExt;
2718}
2719
2720
2721/**
2722 * Frees a physical cross reference extent.
2723 *
2724 * @param pVM The VM handle.
2725 * @param iPhysExt The extent to free.
2726 */
2727void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2728{
2729 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2730 Assert(iPhysExt < pPool->cMaxPhysExts);
2731 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2732 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2733 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2734 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2735 pPool->iPhysExtFreeHead = iPhysExt;
2736}
2737
2738
2739/**
2740 * Frees a physical cross reference extent.
2741 *
2742 * @param pVM The VM handle.
2743 * @param iPhysExt The extent to free.
2744 */
2745void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2746{
2747 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2748
2749 const uint16_t iPhysExtStart = iPhysExt;
2750 PPGMPOOLPHYSEXT pPhysExt;
2751 do
2752 {
2753 Assert(iPhysExt < pPool->cMaxPhysExts);
2754 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2755 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2756 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2757
2758 /* next */
2759 iPhysExt = pPhysExt->iNext;
2760 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2761
2762 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2763 pPool->iPhysExtFreeHead = iPhysExtStart;
2764}
2765
2766/**
2767 * Insert a reference into a list of physical cross reference extents.
2768 *
2769 * @returns The new ram range flags (top 16-bits).
2770 *
2771 * @param pVM The VM handle.
2772 * @param iPhysExt The physical extent index of the list head.
2773 * @param iShwPT The shadow page table index.
2774 *
2775 */
2776static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2777{
2778 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2779 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2780
2781 /* special common case. */
2782 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2783 {
2784 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2785 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2786 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2787 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2788 }
2789
2790 /* general treatment. */
2791 const uint16_t iPhysExtStart = iPhysExt;
2792 unsigned cMax = 15;
2793 for (;;)
2794 {
2795 Assert(iPhysExt < pPool->cMaxPhysExts);
2796 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2797 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2798 {
2799 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2800 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2801 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2802 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2803 }
2804 if (!--cMax)
2805 {
2806 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2807 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2808 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2809 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2810 }
2811 }
2812
2813 /* add another extent to the list. */
2814 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2815 if (!pNew)
2816 {
2817 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2818 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2819 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2820 }
2821 pNew->iNext = iPhysExtStart;
2822 pNew->aidx[0] = iShwPT;
2823 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2824 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2825}
2826
2827
2828/**
2829 * Add a reference to guest physical page where extents are in use.
2830 *
2831 * @returns The new ram range flags (top 16-bits).
2832 *
2833 * @param pVM The VM handle.
2834 * @param u16 The ram range flags (top 16-bits).
2835 * @param iShwPT The shadow page table index.
2836 */
2837uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2838{
2839 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2840 {
2841 /*
2842 * Convert to extent list.
2843 */
2844 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2845 uint16_t iPhysExt;
2846 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2847 if (pPhysExt)
2848 {
2849 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2850 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2851 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2852 pPhysExt->aidx[1] = iShwPT;
2853 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2854 }
2855 else
2856 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2857 }
2858 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2859 {
2860 /*
2861 * Insert into the extent list.
2862 */
2863 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2864 }
2865 else
2866 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2867 return u16;
2868}
2869
2870
2871/**
2872 * Clear references to guest physical memory.
2873 *
2874 * @param pPool The pool.
2875 * @param pPage The page.
2876 * @param pPhysPage Pointer to the aPages entry in the ram range.
2877 */
2878void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
2879{
2880 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
2881 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2882
2883 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2884 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2885 {
2886 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2887 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2888 do
2889 {
2890 Assert(iPhysExt < pPool->cMaxPhysExts);
2891
2892 /*
2893 * Look for the shadow page and check if it's all freed.
2894 */
2895 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2896 {
2897 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2898 {
2899 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2900
2901 for (i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2902 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2903 {
2904 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2905 return;
2906 }
2907
2908 /* we can free the node. */
2909 PVM pVM = pPool->CTXSUFF(pVM);
2910 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2911 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2912 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2913 {
2914 /* lonely node */
2915 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2916 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
2917 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2918 }
2919 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2920 {
2921 /* head */
2922 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
2923 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
2924 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2925 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2926 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2927 }
2928 else
2929 {
2930 /* in list */
2931 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2932 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2933 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2934 }
2935 iPhysExt = iPhysExtNext;
2936 return;
2937 }
2938 }
2939
2940 /* next */
2941 iPhysExtPrev = iPhysExt;
2942 iPhysExt = paPhysExts[iPhysExt].iNext;
2943 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2944
2945 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2946 }
2947 else /* nothing to do */
2948 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
2949}
2950
2951
2952
2953/**
2954 * Clear references to guest physical memory.
2955 *
2956 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
2957 * is assumed to be correct, so the linear search can be skipped and we can assert
2958 * at an earlier point.
2959 *
2960 * @param pPool The pool.
2961 * @param pPage The page.
2962 * @param HCPhys The host physical address corresponding to the guest page.
2963 * @param GCPhys The guest physical address corresponding to HCPhys.
2964 */
2965static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
2966{
2967 /*
2968 * Walk range list.
2969 */
2970 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
2971 while (pRam)
2972 {
2973 RTGCPHYS off = GCPhys - pRam->GCPhys;
2974 if (off < pRam->cb)
2975 {
2976 /* does it match? */
2977 const unsigned iPage = off >> PAGE_SHIFT;
2978 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
2979RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
2980Log(("pgmPoolTracDerefGCPhys %VHp vs %VHp\n", HCPhysPage, HCPhys));
2981 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2982 {
2983 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2984 return;
2985 }
2986 break;
2987 }
2988 pRam = CTXALLSUFF(pRam->pNext);
2989 }
2990 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
2991}
2992
2993
2994/**
2995 * Clear references to guest physical memory.
2996 *
2997 * @param pPool The pool.
2998 * @param pPage The page.
2999 * @param HCPhys The host physical address corresponding to the guest page.
3000 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3001 */
3002static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3003{
3004 /*
3005 * Walk range list.
3006 */
3007 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3008 while (pRam)
3009 {
3010 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3011 if (off < pRam->cb)
3012 {
3013 /* does it match? */
3014 const unsigned iPage = off >> PAGE_SHIFT;
3015 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3016 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3017 {
3018 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3019 return;
3020 }
3021 break;
3022 }
3023 pRam = CTXALLSUFF(pRam->pNext);
3024 }
3025
3026 /*
3027 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3028 */
3029 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3030 pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3031 while (pRam)
3032 {
3033 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3034 while (iPage-- > 0)
3035 {
3036 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3037 {
3038 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
3039 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3040 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3041 return;
3042 }
3043 }
3044 pRam = CTXALLSUFF(pRam->pNext);
3045 }
3046
3047 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
3048}
3049
3050
3051/**
3052 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3053 *
3054 * @param pPool The pool.
3055 * @param pPage The page.
3056 * @param pShwPT The shadow page table (mapping of the page).
3057 * @param pGstPT The guest page table.
3058 */
3059DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3060{
3061 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pShwPT->a); i++)
3062 if (pShwPT->a[i].n.u1Present)
3063 {
3064 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3065 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3066 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3067 if (!--pPage->cPresent)
3068 break;
3069 }
3070}
3071
3072
3073/**
3074 * Clear references to guest physical memory in a PAE / 32-bit page table.
3075 *
3076 * @param pPool The pool.
3077 * @param pPage The page.
3078 * @param pShwPT The shadow page table (mapping of the page).
3079 * @param pGstPT The guest page table (just a half one).
3080 */
3081DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3082{
3083 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
3084 if (pShwPT->a[i].n.u1Present)
3085 {
3086 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3087 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3088 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3089 }
3090}
3091
3092
3093/**
3094 * Clear references to guest physical memory in a PAE / PAE page table.
3095 *
3096 * @param pPool The pool.
3097 * @param pPage The page.
3098 * @param pShwPT The shadow page table (mapping of the page).
3099 * @param pGstPT The guest page table.
3100 */
3101DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3102{
3103 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
3104 if (pShwPT->a[i].n.u1Present)
3105 {
3106 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3107 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3108 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3109 }
3110}
3111
3112
3113/**
3114 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3115 *
3116 * @param pPool The pool.
3117 * @param pPage The page.
3118 * @param pShwPT The shadow page table (mapping of the page).
3119 */
3120DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3121{
3122 RTGCPHYS GCPhys = pPage->GCPhys;
3123 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3124 if (pShwPT->a[i].n.u1Present)
3125 {
3126 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3127 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3128 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3129 }
3130}
3131
3132
3133/**
3134 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3135 *
3136 * @param pPool The pool.
3137 * @param pPage The page.
3138 * @param pShwPT The shadow page table (mapping of the page).
3139 */
3140DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3141{
3142 RTGCPHYS GCPhys = pPage->GCPhys;
3143 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3144 if (pShwPT->a[i].n.u1Present)
3145 {
3146 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%VGp\n",
3147 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3148 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3149 }
3150}
3151#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3152
3153
3154/**
3155 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3156 *
3157 * @param pPool The pool.
3158 * @param pPage The page.
3159 * @param pShwPD The shadow page directory (mapping of the page).
3160 */
3161DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3162{
3163 for (unsigned i = 0; i < ELEMENTS(pShwPD->a); i++)
3164 {
3165 if (pShwPD->a[i].n.u1Present)
3166 {
3167 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3168 if (pSubPage)
3169 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3170 else
3171 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3172 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3173 }
3174 }
3175}
3176
3177
3178/**
3179 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3180 *
3181 * @param pPool The pool.
3182 * @param pPage The page.
3183 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3184 */
3185DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3186{
3187 for (unsigned i = 0; i < ELEMENTS(pShwPDPT->a); i++)
3188 {
3189 if (pShwPDPT->a[i].n.u1Present)
3190 {
3191 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3192 if (pSubPage)
3193 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3194 else
3195 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3196 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3197 }
3198 }
3199}
3200
3201/**
3202 * Clear references to shadowed pages in a 64-bit level 4 page table.
3203 *
3204 * @param pPool The pool.
3205 * @param pPage The page.
3206 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3207 */
3208DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3209{
3210 for (unsigned i = 0; i < ELEMENTS(pShwPML4->a); i++)
3211 {
3212 if (pShwPML4->a[i].n.u1Present)
3213 {
3214 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3215 if (pSubPage)
3216 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3217 else
3218 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3219 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3220 }
3221 }
3222}
3223
3224
3225/**
3226 * Clears all references made by this page.
3227 *
3228 * This includes other shadow pages and GC physical addresses.
3229 *
3230 * @param pPool The pool.
3231 * @param pPage The page.
3232 */
3233static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3234{
3235 /*
3236 * Map the shadow page and take action according to the page kind.
3237 */
3238 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
3239 switch (pPage->enmKind)
3240 {
3241#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3242 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3243 {
3244 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3245 void *pvGst;
3246 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3247 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3248 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3249 break;
3250 }
3251
3252 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3253 {
3254 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3255 void *pvGst;
3256 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3257 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3258 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3259 break;
3260 }
3261
3262 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3263 {
3264 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3265 void *pvGst;
3266 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3267 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3268 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3269 break;
3270 }
3271
3272 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3273 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3274 {
3275 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3276 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3277 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3278 break;
3279 }
3280
3281 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 4 MB page */
3282 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3283 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3284 {
3285 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3286 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3287 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3288 break;
3289 }
3290
3291#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3292 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3293 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3294 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3295 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3296 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3297 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3298 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3299 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3300 break;
3301#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3302
3303 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
3304 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3305 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3306 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3307 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3308 break;
3309
3310 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3311 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3312 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3313 break;
3314
3315 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
3316 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3317 break;
3318
3319 default:
3320 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3321 }
3322
3323 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3324 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3325 ASMMemZeroPage(pvShw);
3326 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3327 pPage->fZeroed = true;
3328}
3329#endif /* PGMPOOL_WITH_USER_TRACKING */
3330
3331
3332/**
3333 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3334 *
3335 * @param pPool The pool.
3336 */
3337static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3338{
3339 /*
3340 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3341 */
3342 Assert(NIL_PGMPOOL_IDX == 0);
3343 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3344 {
3345 /*
3346 * Get the page address.
3347 */
3348 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3349 union
3350 {
3351 uint64_t *pau64;
3352 uint32_t *pau32;
3353 } u;
3354 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
3355
3356 /*
3357 * Mark stuff not present.
3358 */
3359 switch (pPage->enmKind)
3360 {
3361 case PGMPOOLKIND_ROOT_32BIT_PD:
3362 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3363 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3364 u.pau32[iPage] = 0;
3365 break;
3366
3367 case PGMPOOLKIND_ROOT_PAE_PD:
3368 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * X86_PG_PAE_PDPE_ENTRIES; iPage++)
3369 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3370 u.pau64[iPage] = 0;
3371 break;
3372
3373 case PGMPOOLKIND_ROOT_PDPT:
3374 /* Not root of shadowed pages currently, ignore it. */
3375 break;
3376
3377 case PGMPOOLKIND_ROOT_NESTED:
3378 ASMMemZero32(u.pau64, PAGE_SIZE);
3379 break;
3380 }
3381 }
3382
3383 /*
3384 * Paranoia (to be removed), flag a global CR3 sync.
3385 */
3386 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
3387}
3388
3389
3390/**
3391 * Flushes the entire cache.
3392 *
3393 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3394 * and execute this CR3 flush.
3395 *
3396 * @param pPool The pool.
3397 */
3398static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3399{
3400 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3401 LogFlow(("pgmPoolFlushAllInt:\n"));
3402
3403 /*
3404 * If there are no pages in the pool, there is nothing to do.
3405 */
3406 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3407 {
3408 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3409 return;
3410 }
3411
3412 /*
3413 * Nuke the free list and reinsert all pages into it.
3414 */
3415 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3416 {
3417 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3418
3419#ifdef IN_RING3
3420 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
3421#endif
3422#ifdef PGMPOOL_WITH_MONITORING
3423 if (pPage->fMonitored)
3424 pgmPoolMonitorFlush(pPool, pPage);
3425 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3426 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3427 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3428 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3429 pPage->cModifications = 0;
3430#endif
3431 pPage->GCPhys = NIL_RTGCPHYS;
3432 pPage->enmKind = PGMPOOLKIND_FREE;
3433 Assert(pPage->idx == i);
3434 pPage->iNext = i + 1;
3435 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3436 pPage->fSeenNonGlobal = false;
3437 pPage->fMonitored= false;
3438 pPage->fCached = false;
3439 pPage->fReusedFlushPending = false;
3440 pPage->fCR3Mix = false;
3441#ifdef PGMPOOL_WITH_USER_TRACKING
3442 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3443#endif
3444#ifdef PGMPOOL_WITH_CACHE
3445 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3446 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3447#endif
3448 }
3449 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3450 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3451 pPool->cUsedPages = 0;
3452
3453#ifdef PGMPOOL_WITH_USER_TRACKING
3454 /*
3455 * Zap and reinitialize the user records.
3456 */
3457 pPool->cPresent = 0;
3458 pPool->iUserFreeHead = 0;
3459 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3460 const unsigned cMaxUsers = pPool->cMaxUsers;
3461 for (unsigned i = 0; i < cMaxUsers; i++)
3462 {
3463 paUsers[i].iNext = i + 1;
3464 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3465 paUsers[i].iUserTable = 0xfffffffe;
3466 }
3467 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3468#endif
3469
3470#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3471 /*
3472 * Clear all the GCPhys links and rebuild the phys ext free list.
3473 */
3474 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3475 pRam;
3476 pRam = CTXALLSUFF(pRam->pNext))
3477 {
3478 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3479 while (iPage-- > 0)
3480 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3481 }
3482
3483 pPool->iPhysExtFreeHead = 0;
3484 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3485 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3486 for (unsigned i = 0; i < cMaxPhysExts; i++)
3487 {
3488 paPhysExts[i].iNext = i + 1;
3489 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3490 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3491 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3492 }
3493 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3494#endif
3495
3496#ifdef PGMPOOL_WITH_MONITORING
3497 /*
3498 * Just zap the modified list.
3499 */
3500 pPool->cModifiedPages = 0;
3501 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3502#endif
3503
3504#ifdef PGMPOOL_WITH_CACHE
3505 /*
3506 * Clear the GCPhys hash and the age list.
3507 */
3508 for (unsigned i = 0; i < ELEMENTS(pPool->aiHash); i++)
3509 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3510 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3511 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3512#endif
3513
3514 /*
3515 * Flush all the special root pages.
3516 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3517 */
3518 pgmPoolFlushAllSpecialRoots(pPool);
3519 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3520 {
3521 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3522 pPage->iNext = NIL_PGMPOOL_IDX;
3523#ifdef PGMPOOL_WITH_MONITORING
3524 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3525 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3526 pPage->cModifications = 0;
3527 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3528 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3529 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3530 if (pPage->fMonitored)
3531 {
3532 PVM pVM = pPool->CTXSUFF(pVM);
3533 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3534 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3535 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3536 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
3537 pPool->pszAccessHandler);
3538 AssertFatalRCSuccess(rc);
3539# ifdef PGMPOOL_WITH_CACHE
3540 pgmPoolHashInsert(pPool, pPage);
3541# endif
3542 }
3543#endif
3544#ifdef PGMPOOL_WITH_USER_TRACKING
3545 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3546#endif
3547#ifdef PGMPOOL_WITH_CACHE
3548 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3549 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3550#endif
3551 }
3552
3553 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3554}
3555
3556
3557/**
3558 * Flushes a pool page.
3559 *
3560 * This moves the page to the free list after removing all user references to it.
3561 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3562 *
3563 * @returns VBox status code.
3564 * @retval VINF_SUCCESS on success.
3565 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3566 * @param pPool The pool.
3567 * @param HCPhys The HC physical address of the shadow page.
3568 */
3569int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3570{
3571 int rc = VINF_SUCCESS;
3572 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3573 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3574 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3575
3576 /*
3577 * Quietly reject any attempts at flushing any of the special root pages.
3578 */
3579 if (pPage->idx < PGMPOOL_IDX_FIRST)
3580 {
3581 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3582 return VINF_SUCCESS;
3583 }
3584
3585 /*
3586 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
3587 */
3588 if ( pPage->enmKind == PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4
3589 && PGMGetHyperCR3(CTXSUFF(pPool->pVM)) == pPage->Core.Key)
3590 {
3591 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3592 return VINF_SUCCESS;
3593 }
3594 /* Safety precaution in case we change the paging for other modes too in the future. */
3595 AssertFatal(PGMGetHyperCR3(CTXSUFF(pPool->pVM)) != pPage->Core.Key);
3596
3597 /*
3598 * Mark the page as being in need of a ASMMemZeroPage().
3599 */
3600 pPage->fZeroed = false;
3601
3602#ifdef PGMPOOL_WITH_USER_TRACKING
3603 /*
3604 * Clear the page.
3605 */
3606 pgmPoolTrackClearPageUsers(pPool, pPage);
3607 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3608 pgmPoolTrackDeref(pPool, pPage);
3609 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3610#endif
3611
3612#ifdef PGMPOOL_WITH_CACHE
3613 /*
3614 * Flush it from the cache.
3615 */
3616 pgmPoolCacheFlushPage(pPool, pPage);
3617#endif /* PGMPOOL_WITH_CACHE */
3618
3619#ifdef PGMPOOL_WITH_MONITORING
3620 /*
3621 * Deregistering the monitoring.
3622 */
3623 if (pPage->fMonitored)
3624 rc = pgmPoolMonitorFlush(pPool, pPage);
3625#endif
3626
3627 /*
3628 * Free the page.
3629 */
3630 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3631 pPage->iNext = pPool->iFreeHead;
3632 pPool->iFreeHead = pPage->idx;
3633 pPage->enmKind = PGMPOOLKIND_FREE;
3634 pPage->GCPhys = NIL_RTGCPHYS;
3635 pPage->fReusedFlushPending = false;
3636
3637 pPool->cUsedPages--;
3638 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3639 return rc;
3640}
3641
3642
3643/**
3644 * Frees a usage of a pool page.
3645 *
3646 * The caller is responsible to updating the user table so that it no longer
3647 * references the shadow page.
3648 *
3649 * @param pPool The pool.
3650 * @param HCPhys The HC physical address of the shadow page.
3651 * @param iUser The shadow page pool index of the user table.
3652 * @param iUserTable The index into the user table (shadowed).
3653 */
3654void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3655{
3656 STAM_PROFILE_START(&pPool->StatFree, a);
3657 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3658 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3659 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3660#ifdef PGMPOOL_WITH_USER_TRACKING
3661 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3662#endif
3663#ifdef PGMPOOL_WITH_CACHE
3664 if (!pPage->fCached)
3665#endif
3666 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3667 STAM_PROFILE_STOP(&pPool->StatFree, a);
3668}
3669
3670
3671/**
3672 * Makes one or more free page free.
3673 *
3674 * @returns VBox status code.
3675 * @retval VINF_SUCCESS on success.
3676 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3677 *
3678 * @param pPool The pool.
3679 * @param iUser The user of the page.
3680 */
3681static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3682{
3683 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3684
3685 /*
3686 * If the pool isn't full grown yet, expand it.
3687 */
3688 if (pPool->cCurPages < pPool->cMaxPages)
3689 {
3690 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3691#ifdef IN_RING3
3692 int rc = PGMR3PoolGrow(pPool->pVMHC);
3693#else
3694 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3695#endif
3696 if (VBOX_FAILURE(rc))
3697 return rc;
3698 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3699 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3700 return VINF_SUCCESS;
3701 }
3702
3703#ifdef PGMPOOL_WITH_CACHE
3704 /*
3705 * Free one cached page.
3706 */
3707 return pgmPoolCacheFreeOne(pPool, iUser);
3708#else
3709 /*
3710 * Flush the pool.
3711 * If we have tracking enabled, it should be possible to come up with
3712 * a cheap replacement strategy...
3713 */
3714 pgmPoolFlushAllInt(pPool);
3715 return VERR_PGM_POOL_FLUSHED;
3716#endif
3717}
3718
3719
3720/**
3721 * Allocates a page from the pool.
3722 *
3723 * This page may actually be a cached page and not in need of any processing
3724 * on the callers part.
3725 *
3726 * @returns VBox status code.
3727 * @retval VINF_SUCCESS if a NEW page was allocated.
3728 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3729 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3730 * @param pVM The VM handle.
3731 * @param GCPhys The GC physical address of the page we're gonna shadow.
3732 * For 4MB and 2MB PD entries, it's the first address the
3733 * shadow PT is covering.
3734 * @param enmKind The kind of mapping.
3735 * @param iUser The shadow page pool index of the user table.
3736 * @param iUserTable The index into the user table (shadowed).
3737 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3738 */
3739int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
3740{
3741 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3742 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3743 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3744 *ppPage = NULL;
3745
3746#ifdef PGMPOOL_WITH_CACHE
3747 if (pPool->fCacheEnabled)
3748 {
3749 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3750 if (VBOX_SUCCESS(rc2))
3751 {
3752 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3753 LogFlow(("pgmPoolAlloc: cached returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3754 return rc2;
3755 }
3756 }
3757#endif
3758
3759 /*
3760 * Allocate a new one.
3761 */
3762 int rc = VINF_SUCCESS;
3763 uint16_t iNew = pPool->iFreeHead;
3764 if (iNew == NIL_PGMPOOL_IDX)
3765 {
3766 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3767 if (VBOX_FAILURE(rc))
3768 {
3769 if (rc != VERR_PGM_POOL_CLEARED)
3770 {
3771 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3772 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3773 return rc;
3774 }
3775 Log(("pgmPoolMakeMoreFreePages failed with %Vrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
3776 rc = VERR_PGM_POOL_FLUSHED;
3777 }
3778 iNew = pPool->iFreeHead;
3779 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3780 }
3781
3782 /* unlink the free head */
3783 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3784 pPool->iFreeHead = pPage->iNext;
3785 pPage->iNext = NIL_PGMPOOL_IDX;
3786
3787 /*
3788 * Initialize it.
3789 */
3790 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3791 pPage->enmKind = enmKind;
3792 pPage->GCPhys = GCPhys;
3793 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3794 pPage->fMonitored = false;
3795 pPage->fCached = false;
3796 pPage->fReusedFlushPending = false;
3797 pPage->fCR3Mix = false;
3798#ifdef PGMPOOL_WITH_MONITORING
3799 pPage->cModifications = 0;
3800 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3801 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3802#endif
3803#ifdef PGMPOOL_WITH_USER_TRACKING
3804 pPage->cPresent = 0;
3805 pPage->iFirstPresent = ~0;
3806
3807 /*
3808 * Insert into the tracking and cache. If this fails, free the page.
3809 */
3810 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3811 if (VBOX_FAILURE(rc3))
3812 {
3813 if (rc3 != VERR_PGM_POOL_CLEARED)
3814 {
3815 pPool->cUsedPages--;
3816 pPage->enmKind = PGMPOOLKIND_FREE;
3817 pPage->GCPhys = NIL_RTGCPHYS;
3818 pPage->iNext = pPool->iFreeHead;
3819 pPool->iFreeHead = pPage->idx;
3820 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3821 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3822 return rc3;
3823 }
3824 Log(("pgmPoolTrackInsert failed with %Vrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
3825 rc = VERR_PGM_POOL_FLUSHED;
3826 }
3827#endif /* PGMPOOL_WITH_USER_TRACKING */
3828
3829 /*
3830 * Commit the allocation, clear the page and return.
3831 */
3832#ifdef VBOX_WITH_STATISTICS
3833 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3834 pPool->cUsedPagesHigh = pPool->cUsedPages;
3835#endif
3836
3837 if (!pPage->fZeroed)
3838 {
3839 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3840 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3841 ASMMemZeroPage(pv);
3842 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3843 }
3844
3845 *ppPage = pPage;
3846 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3847 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3848 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3849 return rc;
3850}
3851
3852
3853/**
3854 * Frees a usage of a pool page.
3855 *
3856 * @param pVM The VM handle.
3857 * @param HCPhys The HC physical address of the shadow page.
3858 * @param iUser The shadow page pool index of the user table.
3859 * @param iUserTable The index into the user table (shadowed).
3860 */
3861void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
3862{
3863 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
3864 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3865 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
3866}
3867
3868
3869/**
3870 * Gets a in-use page in the pool by it's physical address.
3871 *
3872 * @returns Pointer to the page.
3873 * @param pVM The VM handle.
3874 * @param HCPhys The HC physical address of the shadow page.
3875 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
3876 */
3877PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
3878{
3879 /** @todo profile this! */
3880 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3881 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
3882 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
3883 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
3884 return pPage;
3885}
3886
3887
3888/**
3889 * Flushes the entire cache.
3890 *
3891 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3892 * and execute this CR3 flush.
3893 *
3894 * @param pPool The pool.
3895 */
3896void pgmPoolFlushAll(PVM pVM)
3897{
3898 LogFlow(("pgmPoolFlushAll:\n"));
3899 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
3900}
3901
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette