VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 11704

Last change on this file since 11704 was 11434, checked in by vboxsync, 16 years ago

Compile fix

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 139.9 KB
Line 
1/* $Id: PGMAllPool.cpp 11434 2008-08-14 17:25:32Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_GC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37
38#include <VBox/log.h>
39#include <VBox/err.h>
40#include <iprt/asm.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46__BEGIN_DECLS
47static void pgmPoolFlushAllInt(PPGMPOOL pPool);
48#ifdef PGMPOOL_WITH_USER_TRACKING
49DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
50DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
51static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#endif
53#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
54static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
55#endif
56#ifdef PGMPOOL_WITH_CACHE
57static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
58#endif
59#ifdef PGMPOOL_WITH_MONITORING
60static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
61#endif
62#ifndef IN_RING3
63DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
64#endif
65__END_DECLS
66
67
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86
87
88#ifdef IN_GC
89/**
90 * Maps a pool page into the current context.
91 *
92 * @returns Pointer to the mapping.
93 * @param pVM The VM handle.
94 * @param pPage The page to map.
95 */
96void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
97{
98 /* general pages. */
99 if (pPage->idx >= PGMPOOL_IDX_FIRST)
100 {
101 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
102 void *pv;
103 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
104 AssertReleaseRC(rc);
105 return pv;
106 }
107
108 /* special pages. */
109 switch (pPage->idx)
110 {
111 case PGMPOOL_IDX_PD:
112 return pVM->pgm.s.pGC32BitPD;
113 case PGMPOOL_IDX_PAE_PD:
114 case PGMPOOL_IDX_PAE_PD_0:
115 return pVM->pgm.s.apGCPaePDs[0];
116 case PGMPOOL_IDX_PAE_PD_1:
117 return pVM->pgm.s.apGCPaePDs[1];
118 case PGMPOOL_IDX_PAE_PD_2:
119 return pVM->pgm.s.apGCPaePDs[2];
120 case PGMPOOL_IDX_PAE_PD_3:
121 return pVM->pgm.s.apGCPaePDs[3];
122 case PGMPOOL_IDX_PDPT:
123 return pVM->pgm.s.pGCPaePDPT;
124 default:
125 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
126 return NULL;
127 }
128}
129#endif /* IN_GC */
130
131
132#ifdef PGMPOOL_WITH_MONITORING
133/**
134 * Determin the size of a write instruction.
135 * @returns number of bytes written.
136 * @param pDis The disassembler state.
137 */
138static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
139{
140 /*
141 * This is very crude and possibly wrong for some opcodes,
142 * but since it's not really supposed to be called we can
143 * probably live with that.
144 */
145 return DISGetParamSize(pDis, &pDis->param1);
146}
147
148
149/**
150 * Flushes a chain of pages sharing the same access monitor.
151 *
152 * @returns VBox status code suitable for scheduling.
153 * @param pPool The pool.
154 * @param pPage A page in the chain.
155 */
156int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
157{
158 LogFlow(("pgmPoolMonitorChainFlush: Flush page %VGp type=%d\n", pPage->GCPhys, pPage->enmKind));
159
160 /*
161 * Find the list head.
162 */
163 uint16_t idx = pPage->idx;
164 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
165 {
166 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
167 {
168 idx = pPage->iMonitoredPrev;
169 Assert(idx != pPage->idx);
170 pPage = &pPool->aPages[idx];
171 }
172 }
173
174 /*
175 * Iterate the list flushing each shadow page.
176 */
177 int rc = VINF_SUCCESS;
178 for (;;)
179 {
180 idx = pPage->iMonitoredNext;
181 Assert(idx != pPage->idx);
182 if (pPage->idx >= PGMPOOL_IDX_FIRST)
183 {
184 int rc2 = pgmPoolFlushPage(pPool, pPage);
185 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
186 rc = VINF_PGM_SYNC_CR3;
187 }
188 /* next */
189 if (idx == NIL_PGMPOOL_IDX)
190 break;
191 pPage = &pPool->aPages[idx];
192 }
193 return rc;
194}
195
196
197/**
198 * Wrapper for getting the current context pointer to the entry being modified.
199 *
200 * @returns Pointer to the current context mapping of the entry.
201 * @param pPool The pool.
202 * @param pvFault The fault virtual address.
203 * @param GCPhysFault The fault physical address.
204 * @param cbEntry The entry size.
205 */
206#ifdef IN_RING3
207DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
208#else
209DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
210#endif
211{
212#ifdef IN_GC
213 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
214
215#elif defined(IN_RING0)
216 void *pvRet;
217 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
218 AssertFatalRCSuccess(rc);
219 return pvRet;
220
221#elif defined(IN_RING3)
222 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
223#else
224# error "huh?"
225#endif
226}
227
228
229/**
230 * Process shadow entries before they are changed by the guest.
231 *
232 * For PT entries we will clear them. For PD entries, we'll simply check
233 * for mapping conflicts and set the SyncCR3 FF if found.
234 *
235 * @param pPool The pool.
236 * @param pPage The head page.
237 * @param GCPhysFault The guest physical fault address.
238 * @param uAddress In R0 and GC this is the guest context fault address (flat).
239 * In R3 this is the host context 'fault' address.
240 * @param pCpu The disassembler state for figuring out the write size.
241 * This need not be specified if the caller knows we won't do cross entry accesses.
242 */
243#ifdef IN_RING3
244void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
245#else
246void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
247#endif
248{
249 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
250 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
251 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
252
253 LogFlow(("pgmPoolMonitorChainChanging: %VGv phys=%VGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
254
255 for (;;)
256 {
257 union
258 {
259 void *pv;
260 PX86PT pPT;
261 PX86PTPAE pPTPae;
262 PX86PD pPD;
263 PX86PDPAE pPDPae;
264 PX86PDPT pPDPT;
265 PX86PML4 pPML4;
266 } uShw;
267 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
268
269 switch (pPage->enmKind)
270 {
271 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
272 {
273 const unsigned iShw = off / sizeof(X86PTE);
274 if (uShw.pPT->a[iShw].n.u1Present)
275 {
276# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
277 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
278 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
279 pgmPoolTracDerefGCPhysHint(pPool, pPage,
280 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
281 pGstPte->u & X86_PTE_PG_MASK);
282# endif
283 uShw.pPT->a[iShw].u = 0;
284 }
285 break;
286 }
287
288 /* page/2 sized */
289 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
290 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
291 {
292 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
293 if (uShw.pPTPae->a[iShw].n.u1Present)
294 {
295# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
296 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
297 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
298 pgmPoolTracDerefGCPhysHint(pPool, pPage,
299 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
300 pGstPte->u & X86_PTE_PG_MASK);
301# endif
302 uShw.pPTPae->a[iShw].u = 0;
303 }
304 }
305 break;
306
307 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
308 {
309 const unsigned iShw = off / sizeof(X86PTEPAE);
310 if (uShw.pPTPae->a[iShw].n.u1Present)
311 {
312# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
313 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
314 Log4(("pgmPoolMonitorChainChanging pae: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
315 pgmPoolTracDerefGCPhysHint(pPool, pPage,
316 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
317 pGstPte->u & X86_PTE_PAE_PG_MASK);
318# endif
319 uShw.pPTPae->a[iShw].u = 0;
320 }
321
322 /* paranoia / a bit assumptive. */
323 if ( pCpu
324 && (off & 7)
325 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
326 {
327 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
328 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
329
330 if (uShw.pPTPae->a[iShw2].n.u1Present)
331 {
332# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
333 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
334 Log4(("pgmPoolMonitorChainChanging pae: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
335 pgmPoolTracDerefGCPhysHint(pPool, pPage,
336 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
337 pGstPte->u & X86_PTE_PAE_PG_MASK);
338# endif
339 uShw.pPTPae->a[iShw2].u = 0;
340 }
341 }
342
343 break;
344 }
345
346 case PGMPOOLKIND_ROOT_32BIT_PD:
347 {
348 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
349 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
350 {
351 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
352 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
353 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
354 }
355 /* paranoia / a bit assumptive. */
356 else if ( pCpu
357 && (off & 3)
358 && (off & 3) + cbWrite > sizeof(X86PTE))
359 {
360 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
361 if ( iShw2 != iShw
362 && iShw2 < RT_ELEMENTS(uShw.pPD->a)
363 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
364 {
365 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
366 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
367 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
368 }
369 }
370#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
371 if ( uShw.pPD->a[iShw].n.u1Present
372 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
373 {
374 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
375# ifdef IN_GC /* TLB load - we're pushing things a bit... */
376 ASMProbeReadByte(pvAddress);
377# endif
378 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
379 uShw.pPD->a[iShw].u = 0;
380 }
381#endif
382 break;
383 }
384
385 case PGMPOOLKIND_ROOT_PAE_PD:
386 {
387 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
388 for (unsigned i = 0; i < 2; i++, iShw++)
389 {
390 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
391 {
392 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
393 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
394 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
395 }
396 /* paranoia / a bit assumptive. */
397 else if ( pCpu
398 && (off & 3)
399 && (off & 3) + cbWrite > 4)
400 {
401 const unsigned iShw2 = iShw + 2;
402 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a)
403 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
404 {
405 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
406 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
407 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
408 }
409 }
410#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
411 if ( uShw.pPDPae->a[iShw].n.u1Present
412 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
413 {
414 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
415# ifdef IN_GC /* TLB load - we're pushing things a bit... */
416 ASMProbeReadByte(pvAddress);
417# endif
418 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
419 uShw.pPDPae->a[iShw].u = 0;
420 }
421#endif
422 }
423 break;
424 }
425
426 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
427 {
428 const unsigned iShw = off / sizeof(X86PDEPAE);
429 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
430 {
431 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
432 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
433 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
434 }
435#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
436 /* causes trouble when the guest uses a PDE to refer to the whole page table level structure. (invalidate here; faults later on when it tries
437 * to change the page table entries
438 * -> recheck; probably only applies to the GC case
439 */
440 else
441 {
442 if (uShw.pPDPae->a[iShw].n.u1Present)
443 {
444 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
445 pgmPoolFree(pPool->CTXSUFF(pVM),
446 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
447 /* Note: hardcoded PAE implementation dependency */
448 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
449 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
450 uShw.pPDPae->a[iShw].u = 0;
451 }
452 }
453#endif
454 /* paranoia / a bit assumptive. */
455 if ( pCpu
456 && (off & 7)
457 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
458 {
459 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
460 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
461
462 if ( iShw2 != iShw
463 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
464 {
465 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
466 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
467 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
468 }
469#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
470 else
471 if (uShw.pPDPae->a[iShw2].n.u1Present)
472 {
473 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
474 pgmPoolFree(pPool->CTXSUFF(pVM),
475 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
476 /* Note: hardcoded PAE implementation dependency */
477 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
478 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
479 uShw.pPDPae->a[iShw2].u = 0;
480 }
481#endif
482 }
483 break;
484 }
485
486 case PGMPOOLKIND_ROOT_PDPT:
487 {
488 /* Hopefully this doesn't happen very often:
489 * - touching unused parts of the page
490 * - messing with the bits of pd pointers without changing the physical address
491 */
492 const unsigned iShw = off / sizeof(X86PDPE);
493 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
494 {
495 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
496 {
497 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
498 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
499 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
500 }
501 /* paranoia / a bit assumptive. */
502 else if ( pCpu
503 && (off & 7)
504 && (off & 7) + cbWrite > sizeof(X86PDPE))
505 {
506 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
507 if ( iShw2 != iShw
508 && iShw2 < X86_PG_PAE_PDPE_ENTRIES
509 && uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
510 {
511 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
512 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
513 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
514 }
515 }
516 }
517 break;
518 }
519
520#ifndef IN_GC
521 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
522 {
523 Assert(pPage->enmKind == PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD);
524
525 const unsigned iShw = off / sizeof(X86PDEPAE);
526 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
527 {
528 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
529 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
530 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
531 }
532 else
533 {
534 if (uShw.pPDPae->a[iShw].n.u1Present)
535 {
536 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
537 pgmPoolFree(pPool->CTXSUFF(pVM),
538 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
539 pPage->idx,
540 iShw);
541 uShw.pPDPae->a[iShw].u = 0;
542 }
543 }
544 /* paranoia / a bit assumptive. */
545 if ( pCpu
546 && (off & 7)
547 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
548 {
549 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
550 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
551
552 if ( iShw2 != iShw
553 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
554 {
555 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
556 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
557 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
558 }
559 else
560 if (uShw.pPDPae->a[iShw2].n.u1Present)
561 {
562 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
563 pgmPoolFree(pPool->CTXSUFF(pVM),
564 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
565 pPage->idx,
566 iShw2);
567 uShw.pPDPae->a[iShw2].u = 0;
568 }
569 }
570 break;
571 }
572
573 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
574 {
575 /* Hopefully this doesn't happen very often:
576 * - messing with the bits of pd pointers without changing the physical address
577 */
578 if (!VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
579 {
580 const unsigned iShw = off / sizeof(X86PDPE);
581 if (uShw.pPDPT->a[iShw].n.u1Present)
582 {
583 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
584 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
585 uShw.pPDPT->a[iShw].u = 0;
586 }
587 /* paranoia / a bit assumptive. */
588 if ( pCpu
589 && (off & 7)
590 && (off & 7) + cbWrite > sizeof(X86PDPE))
591 {
592 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
593 if (uShw.pPDPT->a[iShw2].n.u1Present)
594 {
595 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
596 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
597 uShw.pPDPT->a[iShw2].u = 0;
598 }
599 }
600 }
601 break;
602 }
603
604 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
605 {
606 /* Hopefully this doesn't happen very often:
607 * - messing with the bits of pd pointers without changing the physical address
608 */
609 if (!VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
610 {
611 const unsigned iShw = off / sizeof(X86PDPE);
612 if (uShw.pPML4->a[iShw].n.u1Present)
613 {
614 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
615 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
616 uShw.pPML4->a[iShw].u = 0;
617 }
618 /* paranoia / a bit assumptive. */
619 if ( pCpu
620 && (off & 7)
621 && (off & 7) + cbWrite > sizeof(X86PDPE))
622 {
623 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
624 if (uShw.pPML4->a[iShw2].n.u1Present)
625 {
626 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
627 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
628 uShw.pPML4->a[iShw2].u = 0;
629 }
630 }
631 }
632 break;
633 }
634#endif /* IN_RING0 */
635
636 default:
637 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
638 }
639
640 /* next */
641 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
642 return;
643 pPage = &pPool->aPages[pPage->iMonitoredNext];
644 }
645}
646
647
648# ifndef IN_RING3
649/**
650 * Checks if a access could be a fork operation in progress.
651 *
652 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
653 *
654 * @returns true if it's likly that we're forking, otherwise false.
655 * @param pPool The pool.
656 * @param pCpu The disassembled instruction.
657 * @param offFault The access offset.
658 */
659DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
660{
661 /*
662 * i386 linux is using btr to clear X86_PTE_RW.
663 * The functions involved are (2.6.16 source inspection):
664 * clear_bit
665 * ptep_set_wrprotect
666 * copy_one_pte
667 * copy_pte_range
668 * copy_pmd_range
669 * copy_pud_range
670 * copy_page_range
671 * dup_mmap
672 * dup_mm
673 * copy_mm
674 * copy_process
675 * do_fork
676 */
677 if ( pCpu->pCurInstr->opcode == OP_BTR
678 && !(offFault & 4)
679 /** @todo Validate that the bit index is X86_PTE_RW. */
680 )
681 {
682 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
683 return true;
684 }
685 return false;
686}
687
688
689/**
690 * Determine whether the page is likely to have been reused.
691 *
692 * @returns true if we consider the page as being reused for a different purpose.
693 * @returns false if we consider it to still be a paging page.
694 * @param pVM VM Handle.
695 * @param pPage The page in question.
696 * @param pRegFrame Trap register frame.
697 * @param pCpu The disassembly info for the faulting instruction.
698 * @param pvFault The fault address.
699 *
700 * @remark The REP prefix check is left to the caller because of STOSD/W.
701 */
702DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
703{
704#ifndef IN_GC
705 if ( HWACCMHasPendingIrq(pVM)
706 && (pRegFrame->rsp - pvFault) < 32)
707 {
708 /* Fault caused by stack writes while trying to inject an interrupt event. */
709 Log(("pgmPoolMonitorIsReused: reused %VGv for interrupt stack (rsp=%VGv).\n", pvFault, pRegFrame->rsp));
710 return true;
711 }
712#else
713 NOREF(pVM);
714#endif
715
716 switch (pCpu->pCurInstr->opcode)
717 {
718 /* call implies the actual push of the return address faulted */
719 case OP_CALL:
720 Log4(("pgmPoolMonitorIsReused: CALL\n"));
721 return true;
722 case OP_PUSH:
723 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
724 return true;
725 case OP_PUSHF:
726 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
727 return true;
728 case OP_PUSHA:
729 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
730 return true;
731 case OP_FXSAVE:
732 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
733 return true;
734 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
735 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
736 return true;
737 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
738 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
739 return true;
740 case OP_MOVSWD:
741 case OP_STOSWD:
742 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
743 && pRegFrame->rcx >= 0x40
744 )
745 {
746 Assert(pCpu->mode == CPUMODE_64BIT);
747
748 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
749 return true;
750 }
751 return false;
752 }
753 if ( (pCpu->param1.flags & USE_REG_GEN32)
754 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
755 {
756 Log4(("pgmPoolMonitorIsReused: ESP\n"));
757 return true;
758 }
759
760 //if (pPage->fCR3Mix)
761 // return false;
762 return false;
763}
764
765
766/**
767 * Flushes the page being accessed.
768 *
769 * @returns VBox status code suitable for scheduling.
770 * @param pVM The VM handle.
771 * @param pPool The pool.
772 * @param pPage The pool page (head).
773 * @param pCpu The disassembly of the write instruction.
774 * @param pRegFrame The trap register frame.
775 * @param GCPhysFault The fault address as guest physical address.
776 * @param pvFault The fault address.
777 */
778static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
779 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
780{
781 /*
782 * First, do the flushing.
783 */
784 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
785
786 /*
787 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
788 */
789 uint32_t cbWritten;
790 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
791 if (VBOX_SUCCESS(rc2))
792 pRegFrame->rip += pCpu->opsize;
793 else if (rc2 == VERR_EM_INTERPRETER)
794 {
795#ifdef IN_GC
796 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
797 {
798 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
799 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
800 rc = VINF_SUCCESS;
801 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
802 }
803 else
804#endif
805 {
806 rc = VINF_EM_RAW_EMULATE_INSTR;
807 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
808 }
809 }
810 else
811 rc = rc2;
812
813 /* See use in pgmPoolAccessHandlerSimple(). */
814 PGM_INVL_GUEST_TLBS();
815
816 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
817 return rc;
818
819}
820
821
822/**
823 * Handles the STOSD write accesses.
824 *
825 * @returns VBox status code suitable for scheduling.
826 * @param pVM The VM handle.
827 * @param pPool The pool.
828 * @param pPage The pool page (head).
829 * @param pCpu The disassembly of the write instruction.
830 * @param pRegFrame The trap register frame.
831 * @param GCPhysFault The fault address as guest physical address.
832 * @param pvFault The fault address.
833 */
834DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
835 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
836{
837 Assert(pCpu->mode == CPUMODE_32BIT);
838
839 /*
840 * Increment the modification counter and insert it into the list
841 * of modified pages the first time.
842 */
843 if (!pPage->cModifications++)
844 pgmPoolMonitorModifiedInsert(pPool, pPage);
845
846 /*
847 * Execute REP STOSD.
848 *
849 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
850 * write situation, meaning that it's safe to write here.
851 */
852 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
853 while (pRegFrame->ecx)
854 {
855 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
856#ifdef IN_GC
857 *(uint32_t *)pu32 = pRegFrame->eax;
858#else
859 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
860#endif
861 pu32 += 4;
862 GCPhysFault += 4;
863 pRegFrame->edi += 4;
864 pRegFrame->ecx--;
865 }
866 pRegFrame->rip += pCpu->opsize;
867
868 /* See use in pgmPoolAccessHandlerSimple(). */
869 PGM_INVL_GUEST_TLBS();
870
871 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
872 return VINF_SUCCESS;
873}
874
875
876/**
877 * Handles the simple write accesses.
878 *
879 * @returns VBox status code suitable for scheduling.
880 * @param pVM The VM handle.
881 * @param pPool The pool.
882 * @param pPage The pool page (head).
883 * @param pCpu The disassembly of the write instruction.
884 * @param pRegFrame The trap register frame.
885 * @param GCPhysFault The fault address as guest physical address.
886 * @param pvFault The fault address.
887 */
888DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
889 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
890{
891 /*
892 * Increment the modification counter and insert it into the list
893 * of modified pages the first time.
894 */
895 if (!pPage->cModifications++)
896 pgmPoolMonitorModifiedInsert(pPool, pPage);
897
898 /*
899 * Clear all the pages. ASSUMES that pvFault is readable.
900 */
901 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
902
903 /*
904 * Interpret the instruction.
905 */
906 uint32_t cb;
907 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
908 if (VBOX_SUCCESS(rc))
909 pRegFrame->rip += pCpu->opsize;
910 else if (rc == VERR_EM_INTERPRETER)
911 {
912 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
913 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
914 rc = VINF_EM_RAW_EMULATE_INSTR;
915 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
916 }
917
918 /*
919 * Quick hack, with logging enabled we're getting stale
920 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
921 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
922 * have to be fixed to support this. But that'll have to wait till next week.
923 *
924 * An alternative is to keep track of the changed PTEs together with the
925 * GCPhys from the guest PT. This may proove expensive though.
926 *
927 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
928 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
929 */
930 PGM_INVL_GUEST_TLBS();
931
932 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
933 return rc;
934}
935
936
937/**
938 * \#PF Handler callback for PT write accesses.
939 *
940 * @returns VBox status code (appropriate for GC return).
941 * @param pVM VM Handle.
942 * @param uErrorCode CPU Error code.
943 * @param pRegFrame Trap register frame.
944 * NULL on DMA and other non CPU access.
945 * @param pvFault The fault address (cr2).
946 * @param GCPhysFault The GC physical address corresponding to pvFault.
947 * @param pvUser User argument.
948 */
949DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
950{
951 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
952 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
953 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
954 LogFlow(("pgmPoolAccessHandler: pvFault=%VGv pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
955
956 /*
957 * We should ALWAYS have the list head as user parameter. This
958 * is because we use that page to record the changes.
959 */
960 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
961
962 /*
963 * Disassemble the faulting instruction.
964 */
965 DISCPUSTATE Cpu;
966 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
967 AssertRCReturn(rc, rc);
968
969 /*
970 * Check if it's worth dealing with.
971 */
972 bool fReused = false;
973 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
974 || pPage->fCR3Mix)
975 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
976 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
977 {
978 /*
979 * Simple instructions, no REP prefix.
980 */
981 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
982 {
983 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
984 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
985 return rc;
986 }
987
988 /*
989 * Windows is frequently doing small memset() operations (netio test 4k+).
990 * We have to deal with these or we'll kill the cache and performance.
991 */
992 if ( Cpu.pCurInstr->opcode == OP_STOSWD
993 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
994 && pRegFrame->ecx <= 0x20
995 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
996 && !((uintptr_t)pvFault & 3)
997 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
998 && Cpu.mode == CPUMODE_32BIT
999 && Cpu.opmode == CPUMODE_32BIT
1000 && Cpu.addrmode == CPUMODE_32BIT
1001 && Cpu.prefix == PREFIX_REP
1002 && !pRegFrame->eflags.Bits.u1DF
1003 )
1004 {
1005 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1006 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
1007 return rc;
1008 }
1009
1010 /* REP prefix, don't bother. */
1011 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
1012 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%VGv opcode=%d prefix=%#x\n",
1013 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1014 }
1015
1016 /*
1017 * Not worth it, so flush it.
1018 *
1019 * If we considered it to be reused, don't to back to ring-3
1020 * to emulate failed instructions since we usually cannot
1021 * interpret then. This may be a bit risky, in which case
1022 * the reuse detection must be fixed.
1023 */
1024 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1025 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1026 rc = VINF_SUCCESS;
1027 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
1028 return rc;
1029}
1030
1031# endif /* !IN_RING3 */
1032#endif /* PGMPOOL_WITH_MONITORING */
1033
1034
1035
1036#ifdef PGMPOOL_WITH_CACHE
1037/**
1038 * Inserts a page into the GCPhys hash table.
1039 *
1040 * @param pPool The pool.
1041 * @param pPage The page.
1042 */
1043DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1044{
1045 Log3(("pgmPoolHashInsert: %VGp\n", pPage->GCPhys));
1046 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1047 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1048 pPage->iNext = pPool->aiHash[iHash];
1049 pPool->aiHash[iHash] = pPage->idx;
1050}
1051
1052
1053/**
1054 * Removes a page from the GCPhys hash table.
1055 *
1056 * @param pPool The pool.
1057 * @param pPage The page.
1058 */
1059DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1060{
1061 Log3(("pgmPoolHashRemove: %VGp\n", pPage->GCPhys));
1062 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1063 if (pPool->aiHash[iHash] == pPage->idx)
1064 pPool->aiHash[iHash] = pPage->iNext;
1065 else
1066 {
1067 uint16_t iPrev = pPool->aiHash[iHash];
1068 for (;;)
1069 {
1070 const int16_t i = pPool->aPages[iPrev].iNext;
1071 if (i == pPage->idx)
1072 {
1073 pPool->aPages[iPrev].iNext = pPage->iNext;
1074 break;
1075 }
1076 if (i == NIL_PGMPOOL_IDX)
1077 {
1078 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1079 break;
1080 }
1081 iPrev = i;
1082 }
1083 }
1084 pPage->iNext = NIL_PGMPOOL_IDX;
1085}
1086
1087
1088/**
1089 * Frees up one cache page.
1090 *
1091 * @returns VBox status code.
1092 * @retval VINF_SUCCESS on success.
1093 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1094 * @param pPool The pool.
1095 * @param iUser The user index.
1096 */
1097static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1098{
1099#ifndef IN_GC
1100 const PVM pVM = pPool->CTXSUFF(pVM);
1101#endif
1102 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1103 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1104
1105 /*
1106 * Select one page from the tail of the age list.
1107 */
1108 uint16_t iToFree = pPool->iAgeTail;
1109 if (iToFree == iUser)
1110 iToFree = pPool->aPages[iToFree].iAgePrev;
1111/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1112 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1113 {
1114 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1115 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1116 {
1117 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1118 continue;
1119 iToFree = i;
1120 break;
1121 }
1122 }
1123*/
1124 Assert(iToFree != iUser);
1125 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1126
1127 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
1128 if (rc == VINF_SUCCESS)
1129 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1130 return rc;
1131}
1132
1133
1134/**
1135 * Checks if a kind mismatch is really a page being reused
1136 * or if it's just normal remappings.
1137 *
1138 * @returns true if reused and the cached page (enmKind1) should be flushed
1139 * @returns false if not reused.
1140 * @param enmKind1 The kind of the cached page.
1141 * @param enmKind2 The kind of the requested page.
1142 */
1143static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1144{
1145 switch (enmKind1)
1146 {
1147 /*
1148 * Never reuse them. There is no remapping in non-paging mode.
1149 */
1150 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1151 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1152 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1153 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1154 return true;
1155
1156 /*
1157 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1158 */
1159 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1160 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1161 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1162 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1163 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1164 switch (enmKind2)
1165 {
1166 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1167 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1168 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1169 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1170 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1171 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1172 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1173 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1174 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1175 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1176 return true;
1177 default:
1178 return false;
1179 }
1180
1181 /*
1182 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1183 */
1184 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1185 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1186 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1187 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1188 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1189 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1190 switch (enmKind2)
1191 {
1192 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1193 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1194 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1195 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1196 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1197 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1198 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1199 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1200 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1201 return true;
1202 default:
1203 return false;
1204 }
1205
1206 /*
1207 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1208 */
1209 case PGMPOOLKIND_ROOT_32BIT_PD:
1210 case PGMPOOLKIND_ROOT_PAE_PD:
1211 case PGMPOOLKIND_ROOT_PDPT:
1212 case PGMPOOLKIND_ROOT_NESTED:
1213 return false;
1214
1215 default:
1216 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1217 }
1218}
1219
1220
1221/**
1222 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1223 *
1224 * @returns VBox status code.
1225 * @retval VINF_PGM_CACHED_PAGE on success.
1226 * @retval VERR_FILE_NOT_FOUND if not found.
1227 * @param pPool The pool.
1228 * @param GCPhys The GC physical address of the page we're gonna shadow.
1229 * @param enmKind The kind of mapping.
1230 * @param iUser The shadow page pool index of the user table.
1231 * @param iUserTable The index into the user table (shadowed).
1232 * @param ppPage Where to store the pointer to the page.
1233 */
1234static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1235{
1236#ifndef IN_GC
1237 const PVM pVM = pPool->CTXSUFF(pVM);
1238#endif
1239 /*
1240 * Look up the GCPhys in the hash.
1241 */
1242 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1243 Log3(("pgmPoolCacheAlloc: %VGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1244 if (i != NIL_PGMPOOL_IDX)
1245 {
1246 do
1247 {
1248 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1249 Log3(("pgmPoolCacheAlloc: slot %d found page %VGp\n", i, pPage->GCPhys));
1250 if (pPage->GCPhys == GCPhys)
1251 {
1252 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1253 {
1254 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1255 if (VBOX_SUCCESS(rc))
1256 {
1257 *ppPage = pPage;
1258 STAM_COUNTER_INC(&pPool->StatCacheHits);
1259 return VINF_PGM_CACHED_PAGE;
1260 }
1261 return rc;
1262 }
1263
1264 /*
1265 * The kind is different. In some cases we should now flush the page
1266 * as it has been reused, but in most cases this is normal remapping
1267 * of PDs as PT or big pages using the GCPhys field in a slightly
1268 * different way than the other kinds.
1269 */
1270 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1271 {
1272 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1273 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1274 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1275 break;
1276 }
1277 }
1278
1279 /* next */
1280 i = pPage->iNext;
1281 } while (i != NIL_PGMPOOL_IDX);
1282 }
1283
1284 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1285 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1286 return VERR_FILE_NOT_FOUND;
1287}
1288
1289
1290/**
1291 * Inserts a page into the cache.
1292 *
1293 * @param pPool The pool.
1294 * @param pPage The cached page.
1295 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1296 */
1297static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1298{
1299 /*
1300 * Insert into the GCPhys hash if the page is fit for that.
1301 */
1302 Assert(!pPage->fCached);
1303 if (fCanBeCached)
1304 {
1305 pPage->fCached = true;
1306 pgmPoolHashInsert(pPool, pPage);
1307 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1308 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1309 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1310 }
1311 else
1312 {
1313 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1314 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1315 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1316 }
1317
1318 /*
1319 * Insert at the head of the age list.
1320 */
1321 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1322 pPage->iAgeNext = pPool->iAgeHead;
1323 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1324 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1325 else
1326 pPool->iAgeTail = pPage->idx;
1327 pPool->iAgeHead = pPage->idx;
1328}
1329
1330
1331/**
1332 * Flushes a cached page.
1333 *
1334 * @param pPool The pool.
1335 * @param pPage The cached page.
1336 */
1337static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1338{
1339 Log3(("pgmPoolCacheFlushPage: %VGp\n", pPage->GCPhys));
1340
1341 /*
1342 * Remove the page from the hash.
1343 */
1344 if (pPage->fCached)
1345 {
1346 pPage->fCached = false;
1347 pgmPoolHashRemove(pPool, pPage);
1348 }
1349 else
1350 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1351
1352 /*
1353 * Remove it from the age list.
1354 */
1355 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1356 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1357 else
1358 pPool->iAgeTail = pPage->iAgePrev;
1359 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1360 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1361 else
1362 pPool->iAgeHead = pPage->iAgeNext;
1363 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1364 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1365}
1366#endif /* PGMPOOL_WITH_CACHE */
1367
1368
1369#ifdef PGMPOOL_WITH_MONITORING
1370/**
1371 * Looks for pages sharing the monitor.
1372 *
1373 * @returns Pointer to the head page.
1374 * @returns NULL if not found.
1375 * @param pPool The Pool
1376 * @param pNewPage The page which is going to be monitored.
1377 */
1378static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1379{
1380#ifdef PGMPOOL_WITH_CACHE
1381 /*
1382 * Look up the GCPhys in the hash.
1383 */
1384 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1385 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1386 if (i == NIL_PGMPOOL_IDX)
1387 return NULL;
1388 do
1389 {
1390 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1391 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1392 && pPage != pNewPage)
1393 {
1394 switch (pPage->enmKind)
1395 {
1396 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1397 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1398 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1399 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1400 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1401 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1402 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1403 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1404 case PGMPOOLKIND_ROOT_32BIT_PD:
1405 case PGMPOOLKIND_ROOT_PAE_PD:
1406 case PGMPOOLKIND_ROOT_PDPT:
1407 {
1408 /* find the head */
1409 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1410 {
1411 Assert(pPage->iMonitoredPrev != pPage->idx);
1412 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1413 }
1414 return pPage;
1415 }
1416
1417 /* ignore, no monitoring. */
1418 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1419 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1420 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1421 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1422 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1423 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1424 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1425 case PGMPOOLKIND_ROOT_NESTED:
1426 break;
1427 default:
1428 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1429 }
1430 }
1431
1432 /* next */
1433 i = pPage->iNext;
1434 } while (i != NIL_PGMPOOL_IDX);
1435#endif
1436 return NULL;
1437}
1438
1439/**
1440 * Enabled write monitoring of a guest page.
1441 *
1442 * @returns VBox status code.
1443 * @retval VINF_SUCCESS on success.
1444 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1445 * @param pPool The pool.
1446 * @param pPage The cached page.
1447 */
1448static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1449{
1450 LogFlow(("pgmPoolMonitorInsert %VGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1451
1452 /*
1453 * Filter out the relevant kinds.
1454 */
1455 switch (pPage->enmKind)
1456 {
1457 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1458 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1459 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1460 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1461 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1462 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1463 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1464 case PGMPOOLKIND_ROOT_PDPT:
1465 break;
1466
1467 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1468 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1469 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1470 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1471 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1472 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1473 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1474 case PGMPOOLKIND_ROOT_NESTED:
1475 /* Nothing to monitor here. */
1476 return VINF_SUCCESS;
1477
1478 case PGMPOOLKIND_ROOT_32BIT_PD:
1479 case PGMPOOLKIND_ROOT_PAE_PD:
1480#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1481 break;
1482#endif
1483 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1484 default:
1485 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1486 }
1487
1488 /*
1489 * Install handler.
1490 */
1491 int rc;
1492 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1493 if (pPageHead)
1494 {
1495 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1496 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1497 pPage->iMonitoredPrev = pPageHead->idx;
1498 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1499 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1500 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1501 pPageHead->iMonitoredNext = pPage->idx;
1502 rc = VINF_SUCCESS;
1503 }
1504 else
1505 {
1506 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1507 PVM pVM = pPool->CTXSUFF(pVM);
1508 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1509 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1510 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1511 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1512 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1513 pPool->pfnAccessHandlerGC, MMHyperCCToRC(pVM, pPage),
1514 pPool->pszAccessHandler);
1515 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1516 * the heap size should suffice. */
1517 AssertFatalRC(rc);
1518 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1519 rc = VERR_PGM_POOL_CLEARED;
1520 }
1521 pPage->fMonitored = true;
1522 return rc;
1523}
1524
1525
1526/**
1527 * Disables write monitoring of a guest page.
1528 *
1529 * @returns VBox status code.
1530 * @retval VINF_SUCCESS on success.
1531 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1532 * @param pPool The pool.
1533 * @param pPage The cached page.
1534 */
1535static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1536{
1537 /*
1538 * Filter out the relevant kinds.
1539 */
1540 switch (pPage->enmKind)
1541 {
1542 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1543 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1544 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1545 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1546 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1547 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1548 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1549 case PGMPOOLKIND_ROOT_PDPT:
1550 break;
1551
1552 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1553 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1554 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1555 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1556 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1557 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1558 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1559 case PGMPOOLKIND_ROOT_NESTED:
1560 /* Nothing to monitor here. */
1561 return VINF_SUCCESS;
1562
1563 case PGMPOOLKIND_ROOT_32BIT_PD:
1564 case PGMPOOLKIND_ROOT_PAE_PD:
1565#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1566 break;
1567#endif
1568 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1569 default:
1570 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1571 }
1572
1573 /*
1574 * Remove the page from the monitored list or uninstall it if last.
1575 */
1576 const PVM pVM = pPool->CTXSUFF(pVM);
1577 int rc;
1578 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1579 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1580 {
1581 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1582 {
1583 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1584 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1585 pNewHead->fCR3Mix = pPage->fCR3Mix;
1586 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1587 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1588 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1589 pPool->pfnAccessHandlerGC, MMHyperCCToRC(pVM, pNewHead),
1590 pPool->pszAccessHandler);
1591 AssertFatalRCSuccess(rc);
1592 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1593 }
1594 else
1595 {
1596 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1597 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1598 {
1599 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1600 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1601 }
1602 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1603 rc = VINF_SUCCESS;
1604 }
1605 }
1606 else
1607 {
1608 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1609 AssertFatalRC(rc);
1610 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1611 rc = VERR_PGM_POOL_CLEARED;
1612 }
1613 pPage->fMonitored = false;
1614
1615 /*
1616 * Remove it from the list of modified pages (if in it).
1617 */
1618 pgmPoolMonitorModifiedRemove(pPool, pPage);
1619
1620 return rc;
1621}
1622
1623
1624#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1625/**
1626 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1627 *
1628 * @param pPool The Pool.
1629 * @param pPage A page in the chain.
1630 * @param fCR3Mix The new fCR3Mix value.
1631 */
1632static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1633{
1634 /* current */
1635 pPage->fCR3Mix = fCR3Mix;
1636
1637 /* before */
1638 int16_t idx = pPage->iMonitoredPrev;
1639 while (idx != NIL_PGMPOOL_IDX)
1640 {
1641 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1642 idx = pPool->aPages[idx].iMonitoredPrev;
1643 }
1644
1645 /* after */
1646 idx = pPage->iMonitoredNext;
1647 while (idx != NIL_PGMPOOL_IDX)
1648 {
1649 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1650 idx = pPool->aPages[idx].iMonitoredNext;
1651 }
1652}
1653
1654
1655/**
1656 * Installs or modifies monitoring of a CR3 page (special).
1657 *
1658 * We're pretending the CR3 page is shadowed by the pool so we can use the
1659 * generic mechanisms in detecting chained monitoring. (This also gives us a
1660 * tast of what code changes are required to really pool CR3 shadow pages.)
1661 *
1662 * @returns VBox status code.
1663 * @param pPool The pool.
1664 * @param idxRoot The CR3 (root) page index.
1665 * @param GCPhysCR3 The (new) CR3 value.
1666 */
1667int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1668{
1669 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1670 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1671 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1672 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1673
1674 /*
1675 * The unlikely case where it already matches.
1676 */
1677 if (pPage->GCPhys == GCPhysCR3)
1678 {
1679 Assert(pPage->fMonitored);
1680 return VINF_SUCCESS;
1681 }
1682
1683 /*
1684 * Flush the current monitoring and remove it from the hash.
1685 */
1686 int rc = VINF_SUCCESS;
1687 if (pPage->fMonitored)
1688 {
1689 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1690 rc = pgmPoolMonitorFlush(pPool, pPage);
1691 if (rc == VERR_PGM_POOL_CLEARED)
1692 rc = VINF_SUCCESS;
1693 else
1694 AssertFatalRC(rc);
1695 pgmPoolHashRemove(pPool, pPage);
1696 }
1697
1698 /*
1699 * Monitor the page at the new location and insert it into the hash.
1700 */
1701 pPage->GCPhys = GCPhysCR3;
1702 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1703 if (rc2 != VERR_PGM_POOL_CLEARED)
1704 {
1705 AssertFatalRC(rc2);
1706 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1707 rc = rc2;
1708 }
1709 pgmPoolHashInsert(pPool, pPage);
1710 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1711 return rc;
1712}
1713
1714
1715/**
1716 * Removes the monitoring of a CR3 page (special).
1717 *
1718 * @returns VBox status code.
1719 * @param pPool The pool.
1720 * @param idxRoot The CR3 (root) page index.
1721 */
1722int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1723{
1724 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1725 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1726 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1727 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1728
1729 if (!pPage->fMonitored)
1730 return VINF_SUCCESS;
1731
1732 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1733 int rc = pgmPoolMonitorFlush(pPool, pPage);
1734 if (rc != VERR_PGM_POOL_CLEARED)
1735 AssertFatalRC(rc);
1736 else
1737 rc = VINF_SUCCESS;
1738 pgmPoolHashRemove(pPool, pPage);
1739 Assert(!pPage->fMonitored);
1740 pPage->GCPhys = NIL_RTGCPHYS;
1741 return rc;
1742}
1743#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1744
1745
1746/**
1747 * Inserts the page into the list of modified pages.
1748 *
1749 * @param pPool The pool.
1750 * @param pPage The page.
1751 */
1752void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1753{
1754 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1755 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1756 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1757 && pPool->iModifiedHead != pPage->idx,
1758 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1759 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1760 pPool->iModifiedHead, pPool->cModifiedPages));
1761
1762 pPage->iModifiedNext = pPool->iModifiedHead;
1763 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1764 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1765 pPool->iModifiedHead = pPage->idx;
1766 pPool->cModifiedPages++;
1767#ifdef VBOX_WITH_STATISTICS
1768 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1769 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1770#endif
1771}
1772
1773
1774/**
1775 * Removes the page from the list of modified pages and resets the
1776 * moficiation counter.
1777 *
1778 * @param pPool The pool.
1779 * @param pPage The page which is believed to be in the list of modified pages.
1780 */
1781static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1782{
1783 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1784 if (pPool->iModifiedHead == pPage->idx)
1785 {
1786 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1787 pPool->iModifiedHead = pPage->iModifiedNext;
1788 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1789 {
1790 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1791 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1792 }
1793 pPool->cModifiedPages--;
1794 }
1795 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1796 {
1797 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1798 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1799 {
1800 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1801 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1802 }
1803 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1804 pPool->cModifiedPages--;
1805 }
1806 else
1807 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1808 pPage->cModifications = 0;
1809}
1810
1811
1812/**
1813 * Zaps the list of modified pages, resetting their modification counters in the process.
1814 *
1815 * @param pVM The VM handle.
1816 */
1817void pgmPoolMonitorModifiedClearAll(PVM pVM)
1818{
1819 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1820 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1821
1822 unsigned cPages = 0; NOREF(cPages);
1823 uint16_t idx = pPool->iModifiedHead;
1824 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1825 while (idx != NIL_PGMPOOL_IDX)
1826 {
1827 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1828 idx = pPage->iModifiedNext;
1829 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1830 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1831 pPage->cModifications = 0;
1832 Assert(++cPages);
1833 }
1834 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1835 pPool->cModifiedPages = 0;
1836}
1837
1838
1839/**
1840 * Clear all shadow pages and clear all modification counters.
1841 *
1842 * @param pVM The VM handle.
1843 * @remark Should only be used when monitoring is available, thus placed in
1844 * the PGMPOOL_WITH_MONITORING #ifdef.
1845 */
1846void pgmPoolClearAll(PVM pVM)
1847{
1848 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1849 STAM_PROFILE_START(&pPool->StatClearAll, c);
1850 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1851
1852 /*
1853 * Iterate all the pages until we've encountered all that in use.
1854 * This is simple but not quite optimal solution.
1855 */
1856 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1857 unsigned cLeft = pPool->cUsedPages;
1858 unsigned iPage = pPool->cCurPages;
1859 while (--iPage >= PGMPOOL_IDX_FIRST)
1860 {
1861 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1862 if (pPage->GCPhys != NIL_RTGCPHYS)
1863 {
1864 switch (pPage->enmKind)
1865 {
1866 /*
1867 * We only care about shadow page tables.
1868 */
1869 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1870 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1871 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1872 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1873 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1874 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1875 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1876 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1877 {
1878#ifdef PGMPOOL_WITH_USER_TRACKING
1879 if (pPage->cPresent)
1880#endif
1881 {
1882 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1883 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1884 ASMMemZeroPage(pvShw);
1885 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1886#ifdef PGMPOOL_WITH_USER_TRACKING
1887 pPage->cPresent = 0;
1888 pPage->iFirstPresent = ~0;
1889#endif
1890 }
1891 }
1892 /* fall thru */
1893
1894 default:
1895 Assert(!pPage->cModifications || ++cModifiedPages);
1896 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1897 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1898 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1899 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1900 pPage->cModifications = 0;
1901 break;
1902
1903 }
1904 if (!--cLeft)
1905 break;
1906 }
1907 }
1908
1909 /* swipe the special pages too. */
1910 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1911 {
1912 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1913 if (pPage->GCPhys != NIL_RTGCPHYS)
1914 {
1915 Assert(!pPage->cModifications || ++cModifiedPages);
1916 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1917 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1918 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1919 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1920 pPage->cModifications = 0;
1921 }
1922 }
1923
1924#ifndef DEBUG_michael
1925 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1926#endif
1927 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1928 pPool->cModifiedPages = 0;
1929
1930#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1931 /*
1932 * Clear all the GCPhys links and rebuild the phys ext free list.
1933 */
1934 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
1935 pRam;
1936 pRam = CTXALLSUFF(pRam->pNext))
1937 {
1938 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1939 while (iPage-- > 0)
1940 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
1941 }
1942
1943 pPool->iPhysExtFreeHead = 0;
1944 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1945 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1946 for (unsigned i = 0; i < cMaxPhysExts; i++)
1947 {
1948 paPhysExts[i].iNext = i + 1;
1949 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1950 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1951 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1952 }
1953 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1954#endif
1955
1956
1957 pPool->cPresent = 0;
1958 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1959}
1960
1961/**
1962 * Handle SyncCR3 pool tasks
1963 *
1964 * @returns VBox status code.
1965 * @retval VINF_SUCCESS if successfully added.
1966 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
1967 * @param pVM The VM handle.
1968 * @remark Should only be used when monitoring is available, thus placed in
1969 * the PGMPOOL_WITH_MONITORING #ifdef.
1970 */
1971int pgmPoolSyncCR3(PVM pVM)
1972{
1973 /*
1974 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
1975 * Occasionally we will have to clear all the shadow page tables because we wanted
1976 * to monitor a page which was mapped by too many shadowed page tables. This operation
1977 * sometimes refered to as a 'lightweight flush'.
1978 */
1979 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
1980 pgmPoolMonitorModifiedClearAll(pVM);
1981 else
1982 {
1983# ifndef IN_GC
1984 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
1985 pgmPoolClearAll(pVM);
1986# else
1987 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
1988 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
1989 return VINF_PGM_SYNC_CR3;
1990# endif
1991 }
1992 return VINF_SUCCESS;
1993}
1994#endif /* PGMPOOL_WITH_MONITORING */
1995
1996#ifdef PGMPOOL_WITH_USER_TRACKING
1997/**
1998 * Frees up at least one user entry.
1999 *
2000 * @returns VBox status code.
2001 * @retval VINF_SUCCESS if successfully added.
2002 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2003 * @param pPool The pool.
2004 * @param iUser The user index.
2005 */
2006static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2007{
2008 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2009#ifdef PGMPOOL_WITH_CACHE
2010 /*
2011 * Just free cached pages in a braindead fashion.
2012 */
2013 /** @todo walk the age list backwards and free the first with usage. */
2014 int rc = VINF_SUCCESS;
2015 do
2016 {
2017 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2018 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
2019 rc = rc2;
2020 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2021 return rc;
2022#else
2023 /*
2024 * Lazy approach.
2025 */
2026 pgmPoolFlushAllInt(pPool);
2027 return VERR_PGM_POOL_FLUSHED;
2028#endif
2029}
2030
2031
2032/**
2033 * Inserts a page into the cache.
2034 *
2035 * This will create user node for the page, insert it into the GCPhys
2036 * hash, and insert it into the age list.
2037 *
2038 * @returns VBox status code.
2039 * @retval VINF_SUCCESS if successfully added.
2040 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2041 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2042 * @param pPool The pool.
2043 * @param pPage The cached page.
2044 * @param GCPhys The GC physical address of the page we're gonna shadow.
2045 * @param iUser The user index.
2046 * @param iUserTable The user table index.
2047 */
2048DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2049{
2050 int rc = VINF_SUCCESS;
2051 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
2052
2053 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
2054
2055 /*
2056 * Find free a user node.
2057 */
2058 uint16_t i = pPool->iUserFreeHead;
2059 if (i == NIL_PGMPOOL_USER_INDEX)
2060 {
2061 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2062 if (VBOX_FAILURE(rc))
2063 return rc;
2064 i = pPool->iUserFreeHead;
2065 }
2066
2067 /*
2068 * Unlink the user node from the free list,
2069 * initialize and insert it into the user list.
2070 */
2071 pPool->iUserFreeHead = pUser[i].iNext;
2072 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
2073 pUser[i].iUser = iUser;
2074 pUser[i].iUserTable = iUserTable;
2075 pPage->iUserHead = i;
2076
2077 /*
2078 * Insert into cache and enable monitoring of the guest page if enabled.
2079 *
2080 * Until we implement caching of all levels, including the CR3 one, we'll
2081 * have to make sure we don't try monitor & cache any recursive reuse of
2082 * a monitored CR3 page. Because all windows versions are doing this we'll
2083 * have to be able to do combined access monitoring, CR3 + PT and
2084 * PD + PT (guest PAE).
2085 *
2086 * Update:
2087 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2088 */
2089#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2090# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2091 const bool fCanBeMonitored = true;
2092# else
2093 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2094 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2095 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2096# endif
2097# ifdef PGMPOOL_WITH_CACHE
2098 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2099# endif
2100 if (fCanBeMonitored)
2101 {
2102# ifdef PGMPOOL_WITH_MONITORING
2103 rc = pgmPoolMonitorInsert(pPool, pPage);
2104 if (rc == VERR_PGM_POOL_CLEARED)
2105 {
2106 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2107# ifndef PGMPOOL_WITH_CACHE
2108 pgmPoolMonitorFlush(pPool, pPage);
2109 rc = VERR_PGM_POOL_FLUSHED;
2110# endif
2111 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2112 pUser[i].iNext = pPool->iUserFreeHead;
2113 pUser[i].iUser = NIL_PGMPOOL_IDX;
2114 pPool->iUserFreeHead = i;
2115 }
2116 }
2117# endif
2118#endif /* PGMPOOL_WITH_MONITORING */
2119 return rc;
2120}
2121
2122
2123# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2124/**
2125 * Adds a user reference to a page.
2126 *
2127 * This will
2128 * This will move the page to the head of the
2129 *
2130 * @returns VBox status code.
2131 * @retval VINF_SUCCESS if successfully added.
2132 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2133 * @param pPool The pool.
2134 * @param pPage The cached page.
2135 * @param iUser The user index.
2136 * @param iUserTable The user table.
2137 */
2138static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2139{
2140 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2141
2142 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
2143# ifdef VBOX_STRICT
2144 /*
2145 * Check that the entry doesn't already exists.
2146 */
2147 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2148 {
2149 uint16_t i = pPage->iUserHead;
2150 do
2151 {
2152 Assert(i < pPool->cMaxUsers);
2153 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2154 i = paUsers[i].iNext;
2155 } while (i != NIL_PGMPOOL_USER_INDEX);
2156 }
2157# endif
2158
2159 /*
2160 * Allocate a user node.
2161 */
2162 uint16_t i = pPool->iUserFreeHead;
2163 if (i == NIL_PGMPOOL_USER_INDEX)
2164 {
2165 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2166 if (VBOX_FAILURE(rc))
2167 return rc;
2168 i = pPool->iUserFreeHead;
2169 }
2170 pPool->iUserFreeHead = paUsers[i].iNext;
2171
2172 /*
2173 * Initialize the user node and insert it.
2174 */
2175 paUsers[i].iNext = pPage->iUserHead;
2176 paUsers[i].iUser = iUser;
2177 paUsers[i].iUserTable = iUserTable;
2178 pPage->iUserHead = i;
2179
2180# ifdef PGMPOOL_WITH_CACHE
2181 /*
2182 * Tell the cache to update its replacement stats for this page.
2183 */
2184 pgmPoolCacheUsed(pPool, pPage);
2185# endif
2186 return VINF_SUCCESS;
2187}
2188# endif /* PGMPOOL_WITH_CACHE */
2189
2190
2191/**
2192 * Frees a user record associated with a page.
2193 *
2194 * This does not clear the entry in the user table, it simply replaces the
2195 * user record to the chain of free records.
2196 *
2197 * @param pPool The pool.
2198 * @param HCPhys The HC physical address of the shadow page.
2199 * @param iUser The shadow page pool index of the user table.
2200 * @param iUserTable The index into the user table (shadowed).
2201 */
2202static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2203{
2204 /*
2205 * Unlink and free the specified user entry.
2206 */
2207 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2208
2209 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2210 uint16_t i = pPage->iUserHead;
2211 if ( i != NIL_PGMPOOL_USER_INDEX
2212 && paUsers[i].iUser == iUser
2213 && paUsers[i].iUserTable == iUserTable)
2214 {
2215 pPage->iUserHead = paUsers[i].iNext;
2216
2217 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2218 paUsers[i].iNext = pPool->iUserFreeHead;
2219 pPool->iUserFreeHead = i;
2220 return;
2221 }
2222
2223 /* General: Linear search. */
2224 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2225 while (i != NIL_PGMPOOL_USER_INDEX)
2226 {
2227 if ( paUsers[i].iUser == iUser
2228 && paUsers[i].iUserTable == iUserTable)
2229 {
2230 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2231 paUsers[iPrev].iNext = paUsers[i].iNext;
2232 else
2233 pPage->iUserHead = paUsers[i].iNext;
2234
2235 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2236 paUsers[i].iNext = pPool->iUserFreeHead;
2237 pPool->iUserFreeHead = i;
2238 return;
2239 }
2240 iPrev = i;
2241 i = paUsers[i].iNext;
2242 }
2243
2244 /* Fatal: didn't find it */
2245 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
2246 iUser, iUserTable, pPage->GCPhys));
2247}
2248
2249
2250/**
2251 * Gets the entry size of a shadow table.
2252 *
2253 * @param enmKind The kind of page.
2254 *
2255 * @returns The size of the entry in bytes. That is, 4 or 8.
2256 * @returns If the kind is not for a table, an assertion is raised and 0 is
2257 * returned.
2258 */
2259DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2260{
2261 switch (enmKind)
2262 {
2263 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2264 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2265 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2266 case PGMPOOLKIND_ROOT_32BIT_PD:
2267 return 4;
2268
2269 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2270 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2271 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2272 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2273 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2274 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2275 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2276 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2277 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2278 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2279 case PGMPOOLKIND_ROOT_PAE_PD:
2280 case PGMPOOLKIND_ROOT_PDPT:
2281 case PGMPOOLKIND_ROOT_NESTED:
2282 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2283 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2284 return 8;
2285
2286 default:
2287 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2288 }
2289}
2290
2291
2292/**
2293 * Gets the entry size of a guest table.
2294 *
2295 * @param enmKind The kind of page.
2296 *
2297 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2298 * @returns If the kind is not for a table, an assertion is raised and 0 is
2299 * returned.
2300 */
2301DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2302{
2303 switch (enmKind)
2304 {
2305 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2306 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2307 case PGMPOOLKIND_ROOT_32BIT_PD:
2308 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2309 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2310 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2311 return 4;
2312
2313 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2314 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2315 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2316 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2317 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2318 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2319 case PGMPOOLKIND_ROOT_PAE_PD:
2320 case PGMPOOLKIND_ROOT_PDPT:
2321 return 8;
2322
2323 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2324 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2325 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2326 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2327 case PGMPOOLKIND_ROOT_NESTED:
2328 /** @todo can we return 0? (nobody is calling this...) */
2329 AssertFailed();
2330 return 0;
2331
2332 default:
2333 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2334 }
2335}
2336
2337
2338#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2339/**
2340 * Scans one shadow page table for mappings of a physical page.
2341 *
2342 * @param pVM The VM handle.
2343 * @param pPhysPage The guest page in question.
2344 * @param iShw The shadow page table.
2345 * @param cRefs The number of references made in that PT.
2346 */
2347static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2348{
2349 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2350 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2351
2352 /*
2353 * Assert sanity.
2354 */
2355 Assert(cRefs == 1);
2356 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2357 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2358
2359 /*
2360 * Then, clear the actual mappings to the page in the shadow PT.
2361 */
2362 switch (pPage->enmKind)
2363 {
2364 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2365 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2366 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2367 {
2368 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2369 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2370 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2371 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2372 {
2373 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2374 pPT->a[i].u = 0;
2375 cRefs--;
2376 if (!cRefs)
2377 return;
2378 }
2379#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2380 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2381 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2382 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2383 {
2384 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2385 pPT->a[i].u = 0;
2386 }
2387#endif
2388 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2389 break;
2390 }
2391
2392 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2393 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2394 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2395 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2396 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2397 {
2398 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2399 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2400 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2401 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2402 {
2403 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2404 pPT->a[i].u = 0;
2405 cRefs--;
2406 if (!cRefs)
2407 return;
2408 }
2409#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2410 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2411 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2412 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2413 {
2414 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2415 pPT->a[i].u = 0;
2416 }
2417#endif
2418 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2419 break;
2420 }
2421
2422 default:
2423 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2424 }
2425}
2426
2427
2428/**
2429 * Scans one shadow page table for mappings of a physical page.
2430 *
2431 * @param pVM The VM handle.
2432 * @param pPhysPage The guest page in question.
2433 * @param iShw The shadow page table.
2434 * @param cRefs The number of references made in that PT.
2435 */
2436void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2437{
2438 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2439 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2440 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2441 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2442 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2443 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2444}
2445
2446
2447/**
2448 * Flushes a list of shadow page tables mapping the same physical page.
2449 *
2450 * @param pVM The VM handle.
2451 * @param pPhysPage The guest page in question.
2452 * @param iPhysExt The physical cross reference extent list to flush.
2453 */
2454void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2455{
2456 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2457 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2458 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2459
2460 const uint16_t iPhysExtStart = iPhysExt;
2461 PPGMPOOLPHYSEXT pPhysExt;
2462 do
2463 {
2464 Assert(iPhysExt < pPool->cMaxPhysExts);
2465 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2466 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2467 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2468 {
2469 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2470 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2471 }
2472
2473 /* next */
2474 iPhysExt = pPhysExt->iNext;
2475 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2476
2477 /* insert the list into the free list and clear the ram range entry. */
2478 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2479 pPool->iPhysExtFreeHead = iPhysExtStart;
2480 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2481
2482 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2483}
2484#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2485
2486
2487/**
2488 * Scans all shadow page tables for mappings of a physical page.
2489 *
2490 * This may be slow, but it's most likely more efficient than cleaning
2491 * out the entire page pool / cache.
2492 *
2493 * @returns VBox status code.
2494 * @retval VINF_SUCCESS if all references has been successfully cleared.
2495 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2496 * a page pool cleaning.
2497 *
2498 * @param pVM The VM handle.
2499 * @param pPhysPage The guest page in question.
2500 */
2501int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2502{
2503 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2504 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2505 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2506 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2507
2508#if 1
2509 /*
2510 * There is a limit to what makes sense.
2511 */
2512 if (pPool->cPresent > 1024)
2513 {
2514 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2515 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2516 return VINF_PGM_GCPHYS_ALIASED;
2517 }
2518#endif
2519
2520 /*
2521 * Iterate all the pages until we've encountered all that in use.
2522 * This is simple but not quite optimal solution.
2523 */
2524 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2525 const uint32_t u32 = u64;
2526 unsigned cLeft = pPool->cUsedPages;
2527 unsigned iPage = pPool->cCurPages;
2528 while (--iPage >= PGMPOOL_IDX_FIRST)
2529 {
2530 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2531 if (pPage->GCPhys != NIL_RTGCPHYS)
2532 {
2533 switch (pPage->enmKind)
2534 {
2535 /*
2536 * We only care about shadow page tables.
2537 */
2538 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2539 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2540 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2541 {
2542 unsigned cPresent = pPage->cPresent;
2543 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2544 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2545 if (pPT->a[i].n.u1Present)
2546 {
2547 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2548 {
2549 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2550 pPT->a[i].u = 0;
2551 }
2552 if (!--cPresent)
2553 break;
2554 }
2555 break;
2556 }
2557
2558 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2559 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2560 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2561 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2562 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2563 {
2564 unsigned cPresent = pPage->cPresent;
2565 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2566 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2567 if (pPT->a[i].n.u1Present)
2568 {
2569 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2570 {
2571 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2572 pPT->a[i].u = 0;
2573 }
2574 if (!--cPresent)
2575 break;
2576 }
2577 break;
2578 }
2579 }
2580 if (!--cLeft)
2581 break;
2582 }
2583 }
2584
2585 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2586 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2587 return VINF_SUCCESS;
2588}
2589
2590
2591/**
2592 * Clears the user entry in a user table.
2593 *
2594 * This is used to remove all references to a page when flushing it.
2595 */
2596static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2597{
2598 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2599 Assert(pUser->iUser < pPool->cCurPages);
2600
2601 /*
2602 * Map the user page.
2603 */
2604 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2605 union
2606 {
2607 uint64_t *pau64;
2608 uint32_t *pau32;
2609 } u;
2610 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pUserPage);
2611
2612 /* Safety precaution in case we change the paging for other modes too in the future. */
2613 Assert(PGMGetHyperCR3(CTXSUFF(pPool->pVM)) != pPage->Core.Key);
2614
2615#ifdef VBOX_STRICT
2616 /*
2617 * Some sanity checks.
2618 */
2619 switch (pUserPage->enmKind)
2620 {
2621 case PGMPOOLKIND_ROOT_32BIT_PD:
2622 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2623 Assert(!(u.pau32[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2624 break;
2625 case PGMPOOLKIND_ROOT_PAE_PD:
2626 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2627 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2628 break;
2629 case PGMPOOLKIND_ROOT_PDPT:
2630 Assert(pUser->iUserTable < 4);
2631 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2632 break;
2633 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2634 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2635 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2636 break;
2637 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2638 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2639 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2640 break;
2641 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2642 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2643 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2644 break;
2645 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2646 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2647 /* GCPhys >> PAGE_SHIFT is the index here */
2648 break;
2649 case PGMPOOLKIND_ROOT_NESTED:
2650 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2651 break;
2652
2653 default:
2654 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2655 break;
2656 }
2657#endif /* VBOX_STRICT */
2658
2659 /*
2660 * Clear the entry in the user page.
2661 */
2662 switch (pUserPage->enmKind)
2663 {
2664 /* 32-bit entries */
2665 case PGMPOOLKIND_ROOT_32BIT_PD:
2666 u.pau32[pUser->iUserTable] = 0;
2667 break;
2668
2669 /* 64-bit entries */
2670 case PGMPOOLKIND_ROOT_PAE_PD:
2671 case PGMPOOLKIND_ROOT_PDPT:
2672 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2673 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2674 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2675 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2676 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2677 case PGMPOOLKIND_ROOT_NESTED:
2678 u.pau64[pUser->iUserTable] = 0;
2679 break;
2680
2681 default:
2682 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2683 }
2684}
2685
2686
2687/**
2688 * Clears all users of a page.
2689 */
2690static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2691{
2692 /*
2693 * Free all the user records.
2694 */
2695 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2696 uint16_t i = pPage->iUserHead;
2697 while (i != NIL_PGMPOOL_USER_INDEX)
2698 {
2699 /* Clear enter in user table. */
2700 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2701
2702 /* Free it. */
2703 const uint16_t iNext = paUsers[i].iNext;
2704 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2705 paUsers[i].iNext = pPool->iUserFreeHead;
2706 pPool->iUserFreeHead = i;
2707
2708 /* Next. */
2709 i = iNext;
2710 }
2711 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2712}
2713
2714
2715#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2716/**
2717 * Allocates a new physical cross reference extent.
2718 *
2719 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2720 * @param pVM The VM handle.
2721 * @param piPhysExt Where to store the phys ext index.
2722 */
2723PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2724{
2725 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2726 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2727 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2728 {
2729 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2730 return NULL;
2731 }
2732 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2733 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2734 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2735 *piPhysExt = iPhysExt;
2736 return pPhysExt;
2737}
2738
2739
2740/**
2741 * Frees a physical cross reference extent.
2742 *
2743 * @param pVM The VM handle.
2744 * @param iPhysExt The extent to free.
2745 */
2746void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2747{
2748 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2749 Assert(iPhysExt < pPool->cMaxPhysExts);
2750 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2751 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2752 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2753 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2754 pPool->iPhysExtFreeHead = iPhysExt;
2755}
2756
2757
2758/**
2759 * Frees a physical cross reference extent.
2760 *
2761 * @param pVM The VM handle.
2762 * @param iPhysExt The extent to free.
2763 */
2764void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2765{
2766 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2767
2768 const uint16_t iPhysExtStart = iPhysExt;
2769 PPGMPOOLPHYSEXT pPhysExt;
2770 do
2771 {
2772 Assert(iPhysExt < pPool->cMaxPhysExts);
2773 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2774 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2775 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2776
2777 /* next */
2778 iPhysExt = pPhysExt->iNext;
2779 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2780
2781 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2782 pPool->iPhysExtFreeHead = iPhysExtStart;
2783}
2784
2785/**
2786 * Insert a reference into a list of physical cross reference extents.
2787 *
2788 * @returns The new ram range flags (top 16-bits).
2789 *
2790 * @param pVM The VM handle.
2791 * @param iPhysExt The physical extent index of the list head.
2792 * @param iShwPT The shadow page table index.
2793 *
2794 */
2795static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2796{
2797 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2798 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2799
2800 /* special common case. */
2801 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2802 {
2803 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2804 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2805 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2806 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2807 }
2808
2809 /* general treatment. */
2810 const uint16_t iPhysExtStart = iPhysExt;
2811 unsigned cMax = 15;
2812 for (;;)
2813 {
2814 Assert(iPhysExt < pPool->cMaxPhysExts);
2815 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2816 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2817 {
2818 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2819 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2820 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2821 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2822 }
2823 if (!--cMax)
2824 {
2825 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2826 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2827 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2828 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2829 }
2830 }
2831
2832 /* add another extent to the list. */
2833 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2834 if (!pNew)
2835 {
2836 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2837 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2838 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2839 }
2840 pNew->iNext = iPhysExtStart;
2841 pNew->aidx[0] = iShwPT;
2842 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2843 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2844}
2845
2846
2847/**
2848 * Add a reference to guest physical page where extents are in use.
2849 *
2850 * @returns The new ram range flags (top 16-bits).
2851 *
2852 * @param pVM The VM handle.
2853 * @param u16 The ram range flags (top 16-bits).
2854 * @param iShwPT The shadow page table index.
2855 */
2856uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2857{
2858 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2859 {
2860 /*
2861 * Convert to extent list.
2862 */
2863 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2864 uint16_t iPhysExt;
2865 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2866 if (pPhysExt)
2867 {
2868 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2869 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2870 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2871 pPhysExt->aidx[1] = iShwPT;
2872 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2873 }
2874 else
2875 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2876 }
2877 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2878 {
2879 /*
2880 * Insert into the extent list.
2881 */
2882 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2883 }
2884 else
2885 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2886 return u16;
2887}
2888
2889
2890/**
2891 * Clear references to guest physical memory.
2892 *
2893 * @param pPool The pool.
2894 * @param pPage The page.
2895 * @param pPhysPage Pointer to the aPages entry in the ram range.
2896 */
2897void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
2898{
2899 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
2900 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2901
2902 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2903 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2904 {
2905 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2906 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2907 do
2908 {
2909 Assert(iPhysExt < pPool->cMaxPhysExts);
2910
2911 /*
2912 * Look for the shadow page and check if it's all freed.
2913 */
2914 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2915 {
2916 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2917 {
2918 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2919
2920 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2921 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2922 {
2923 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2924 return;
2925 }
2926
2927 /* we can free the node. */
2928 PVM pVM = pPool->CTXSUFF(pVM);
2929 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2930 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2931 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2932 {
2933 /* lonely node */
2934 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2935 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
2936 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2937 }
2938 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2939 {
2940 /* head */
2941 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
2942 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
2943 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2944 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2945 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2946 }
2947 else
2948 {
2949 /* in list */
2950 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2951 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2952 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2953 }
2954 iPhysExt = iPhysExtNext;
2955 return;
2956 }
2957 }
2958
2959 /* next */
2960 iPhysExtPrev = iPhysExt;
2961 iPhysExt = paPhysExts[iPhysExt].iNext;
2962 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2963
2964 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2965 }
2966 else /* nothing to do */
2967 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
2968}
2969
2970
2971
2972/**
2973 * Clear references to guest physical memory.
2974 *
2975 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
2976 * is assumed to be correct, so the linear search can be skipped and we can assert
2977 * at an earlier point.
2978 *
2979 * @param pPool The pool.
2980 * @param pPage The page.
2981 * @param HCPhys The host physical address corresponding to the guest page.
2982 * @param GCPhys The guest physical address corresponding to HCPhys.
2983 */
2984static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
2985{
2986 /*
2987 * Walk range list.
2988 */
2989 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
2990 while (pRam)
2991 {
2992 RTGCPHYS off = GCPhys - pRam->GCPhys;
2993 if (off < pRam->cb)
2994 {
2995 /* does it match? */
2996 const unsigned iPage = off >> PAGE_SHIFT;
2997 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
2998RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
2999Log(("pgmPoolTracDerefGCPhys %VHp vs %VHp\n", HCPhysPage, HCPhys));
3000 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3001 {
3002 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3003 return;
3004 }
3005 break;
3006 }
3007 pRam = CTXALLSUFF(pRam->pNext);
3008 }
3009 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
3010}
3011
3012
3013/**
3014 * Clear references to guest physical memory.
3015 *
3016 * @param pPool The pool.
3017 * @param pPage The page.
3018 * @param HCPhys The host physical address corresponding to the guest page.
3019 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3020 */
3021static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3022{
3023 /*
3024 * Walk range list.
3025 */
3026 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3027 while (pRam)
3028 {
3029 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3030 if (off < pRam->cb)
3031 {
3032 /* does it match? */
3033 const unsigned iPage = off >> PAGE_SHIFT;
3034 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3035 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3036 {
3037 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3038 return;
3039 }
3040 break;
3041 }
3042 pRam = CTXALLSUFF(pRam->pNext);
3043 }
3044
3045 /*
3046 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3047 */
3048 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3049 pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3050 while (pRam)
3051 {
3052 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3053 while (iPage-- > 0)
3054 {
3055 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3056 {
3057 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
3058 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3059 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3060 return;
3061 }
3062 }
3063 pRam = CTXALLSUFF(pRam->pNext);
3064 }
3065
3066 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
3067}
3068
3069
3070/**
3071 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3072 *
3073 * @param pPool The pool.
3074 * @param pPage The page.
3075 * @param pShwPT The shadow page table (mapping of the page).
3076 * @param pGstPT The guest page table.
3077 */
3078DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3079{
3080 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3081 if (pShwPT->a[i].n.u1Present)
3082 {
3083 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3084 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3085 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3086 if (!--pPage->cPresent)
3087 break;
3088 }
3089}
3090
3091
3092/**
3093 * Clear references to guest physical memory in a PAE / 32-bit page table.
3094 *
3095 * @param pPool The pool.
3096 * @param pPage The page.
3097 * @param pShwPT The shadow page table (mapping of the page).
3098 * @param pGstPT The guest page table (just a half one).
3099 */
3100DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3101{
3102 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3103 if (pShwPT->a[i].n.u1Present)
3104 {
3105 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3106 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3107 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3108 }
3109}
3110
3111
3112/**
3113 * Clear references to guest physical memory in a PAE / PAE page table.
3114 *
3115 * @param pPool The pool.
3116 * @param pPage The page.
3117 * @param pShwPT The shadow page table (mapping of the page).
3118 * @param pGstPT The guest page table.
3119 */
3120DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3121{
3122 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3123 if (pShwPT->a[i].n.u1Present)
3124 {
3125 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3126 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3127 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3128 }
3129}
3130
3131
3132/**
3133 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3134 *
3135 * @param pPool The pool.
3136 * @param pPage The page.
3137 * @param pShwPT The shadow page table (mapping of the page).
3138 */
3139DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3140{
3141 RTGCPHYS GCPhys = pPage->GCPhys;
3142 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3143 if (pShwPT->a[i].n.u1Present)
3144 {
3145 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3146 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3147 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3148 }
3149}
3150
3151
3152/**
3153 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3154 *
3155 * @param pPool The pool.
3156 * @param pPage The page.
3157 * @param pShwPT The shadow page table (mapping of the page).
3158 */
3159DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3160{
3161 RTGCPHYS GCPhys = pPage->GCPhys;
3162 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3163 if (pShwPT->a[i].n.u1Present)
3164 {
3165 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%VGp\n",
3166 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3167 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3168 }
3169}
3170#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3171
3172
3173/**
3174 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3175 *
3176 * @param pPool The pool.
3177 * @param pPage The page.
3178 * @param pShwPD The shadow page directory (mapping of the page).
3179 */
3180DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3181{
3182 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3183 {
3184 if (pShwPD->a[i].n.u1Present)
3185 {
3186 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3187 if (pSubPage)
3188 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3189 else
3190 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3191 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3192 }
3193 }
3194}
3195
3196
3197/**
3198 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3199 *
3200 * @param pPool The pool.
3201 * @param pPage The page.
3202 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3203 */
3204DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3205{
3206 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3207 {
3208 if (pShwPDPT->a[i].n.u1Present)
3209 {
3210 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3211 if (pSubPage)
3212 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3213 else
3214 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3215 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3216 }
3217 }
3218}
3219
3220/**
3221 * Clear references to shadowed pages in a 64-bit level 4 page table.
3222 *
3223 * @param pPool The pool.
3224 * @param pPage The page.
3225 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3226 */
3227DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3228{
3229 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3230 {
3231 if (pShwPML4->a[i].n.u1Present)
3232 {
3233 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3234 if (pSubPage)
3235 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3236 else
3237 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3238 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3239 }
3240 }
3241}
3242
3243
3244/**
3245 * Clears all references made by this page.
3246 *
3247 * This includes other shadow pages and GC physical addresses.
3248 *
3249 * @param pPool The pool.
3250 * @param pPage The page.
3251 */
3252static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3253{
3254 /*
3255 * Map the shadow page and take action according to the page kind.
3256 */
3257 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
3258 switch (pPage->enmKind)
3259 {
3260#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3261 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3262 {
3263 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3264 void *pvGst;
3265 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3266 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3267 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3268 break;
3269 }
3270
3271 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3272 {
3273 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3274 void *pvGst;
3275 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3276 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3277 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3278 break;
3279 }
3280
3281 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3282 {
3283 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3284 void *pvGst;
3285 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3286 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3287 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3288 break;
3289 }
3290
3291 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3292 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3293 {
3294 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3295 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3296 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3297 break;
3298 }
3299
3300 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 4 MB page */
3301 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3302 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3303 {
3304 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3305 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3306 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3307 break;
3308 }
3309
3310#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3311 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3312 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3313 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3314 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3315 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3316 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3317 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3318 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3319 break;
3320#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3321
3322 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
3323 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3324 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3325 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3326 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3327 break;
3328
3329 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3330 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3331 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3332 break;
3333
3334 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
3335 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3336 break;
3337
3338 default:
3339 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3340 }
3341
3342 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3343 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3344 ASMMemZeroPage(pvShw);
3345 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3346 pPage->fZeroed = true;
3347}
3348#endif /* PGMPOOL_WITH_USER_TRACKING */
3349
3350
3351/**
3352 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3353 *
3354 * @param pPool The pool.
3355 */
3356static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3357{
3358 /*
3359 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3360 */
3361 Assert(NIL_PGMPOOL_IDX == 0);
3362 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3363 {
3364 /*
3365 * Get the page address.
3366 */
3367 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3368 union
3369 {
3370 uint64_t *pau64;
3371 uint32_t *pau32;
3372 } u;
3373 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
3374
3375 /*
3376 * Mark stuff not present.
3377 */
3378 switch (pPage->enmKind)
3379 {
3380 case PGMPOOLKIND_ROOT_32BIT_PD:
3381 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3382 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3383 u.pau32[iPage] = 0;
3384 break;
3385
3386 case PGMPOOLKIND_ROOT_PAE_PD:
3387 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * X86_PG_PAE_PDPE_ENTRIES; iPage++)
3388 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3389 u.pau64[iPage] = 0;
3390 break;
3391
3392 case PGMPOOLKIND_ROOT_PDPT:
3393 /* Not root of shadowed pages currently, ignore it. */
3394 break;
3395
3396 case PGMPOOLKIND_ROOT_NESTED:
3397 ASMMemZero32(u.pau64, PAGE_SIZE);
3398 break;
3399 }
3400 }
3401
3402 /*
3403 * Paranoia (to be removed), flag a global CR3 sync.
3404 */
3405 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
3406}
3407
3408
3409/**
3410 * Flushes the entire cache.
3411 *
3412 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3413 * and execute this CR3 flush.
3414 *
3415 * @param pPool The pool.
3416 */
3417static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3418{
3419 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3420 LogFlow(("pgmPoolFlushAllInt:\n"));
3421
3422 /*
3423 * If there are no pages in the pool, there is nothing to do.
3424 */
3425 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3426 {
3427 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3428 return;
3429 }
3430
3431 /*
3432 * Nuke the free list and reinsert all pages into it.
3433 */
3434 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3435 {
3436 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3437
3438#ifdef IN_RING3
3439 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
3440#endif
3441#ifdef PGMPOOL_WITH_MONITORING
3442 if (pPage->fMonitored)
3443 pgmPoolMonitorFlush(pPool, pPage);
3444 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3445 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3446 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3447 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3448 pPage->cModifications = 0;
3449#endif
3450 pPage->GCPhys = NIL_RTGCPHYS;
3451 pPage->enmKind = PGMPOOLKIND_FREE;
3452 Assert(pPage->idx == i);
3453 pPage->iNext = i + 1;
3454 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3455 pPage->fSeenNonGlobal = false;
3456 pPage->fMonitored= false;
3457 pPage->fCached = false;
3458 pPage->fReusedFlushPending = false;
3459 pPage->fCR3Mix = false;
3460#ifdef PGMPOOL_WITH_USER_TRACKING
3461 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3462#endif
3463#ifdef PGMPOOL_WITH_CACHE
3464 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3465 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3466#endif
3467 }
3468 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3469 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3470 pPool->cUsedPages = 0;
3471
3472#ifdef PGMPOOL_WITH_USER_TRACKING
3473 /*
3474 * Zap and reinitialize the user records.
3475 */
3476 pPool->cPresent = 0;
3477 pPool->iUserFreeHead = 0;
3478 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3479 const unsigned cMaxUsers = pPool->cMaxUsers;
3480 for (unsigned i = 0; i < cMaxUsers; i++)
3481 {
3482 paUsers[i].iNext = i + 1;
3483 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3484 paUsers[i].iUserTable = 0xfffffffe;
3485 }
3486 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3487#endif
3488
3489#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3490 /*
3491 * Clear all the GCPhys links and rebuild the phys ext free list.
3492 */
3493 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3494 pRam;
3495 pRam = CTXALLSUFF(pRam->pNext))
3496 {
3497 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3498 while (iPage-- > 0)
3499 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3500 }
3501
3502 pPool->iPhysExtFreeHead = 0;
3503 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3504 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3505 for (unsigned i = 0; i < cMaxPhysExts; i++)
3506 {
3507 paPhysExts[i].iNext = i + 1;
3508 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3509 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3510 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3511 }
3512 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3513#endif
3514
3515#ifdef PGMPOOL_WITH_MONITORING
3516 /*
3517 * Just zap the modified list.
3518 */
3519 pPool->cModifiedPages = 0;
3520 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3521#endif
3522
3523#ifdef PGMPOOL_WITH_CACHE
3524 /*
3525 * Clear the GCPhys hash and the age list.
3526 */
3527 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
3528 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3529 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3530 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3531#endif
3532
3533 /*
3534 * Flush all the special root pages.
3535 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3536 */
3537 pgmPoolFlushAllSpecialRoots(pPool);
3538 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3539 {
3540 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3541 pPage->iNext = NIL_PGMPOOL_IDX;
3542#ifdef PGMPOOL_WITH_MONITORING
3543 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3544 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3545 pPage->cModifications = 0;
3546 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3547 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3548 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3549 if (pPage->fMonitored)
3550 {
3551 PVM pVM = pPool->CTXSUFF(pVM);
3552 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3553 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3554 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3555 pPool->pfnAccessHandlerGC, MMHyperCCToRC(pVM, pPage),
3556 pPool->pszAccessHandler);
3557 AssertFatalRCSuccess(rc);
3558# ifdef PGMPOOL_WITH_CACHE
3559 pgmPoolHashInsert(pPool, pPage);
3560# endif
3561 }
3562#endif
3563#ifdef PGMPOOL_WITH_USER_TRACKING
3564 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3565#endif
3566#ifdef PGMPOOL_WITH_CACHE
3567 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3568 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3569#endif
3570 }
3571
3572 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3573}
3574
3575
3576/**
3577 * Flushes a pool page.
3578 *
3579 * This moves the page to the free list after removing all user references to it.
3580 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3581 *
3582 * @returns VBox status code.
3583 * @retval VINF_SUCCESS on success.
3584 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3585 * @param pPool The pool.
3586 * @param HCPhys The HC physical address of the shadow page.
3587 */
3588int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3589{
3590 int rc = VINF_SUCCESS;
3591 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3592 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3593 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3594
3595 /*
3596 * Quietly reject any attempts at flushing any of the special root pages.
3597 */
3598 if (pPage->idx < PGMPOOL_IDX_FIRST)
3599 {
3600 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3601 return VINF_SUCCESS;
3602 }
3603
3604 /*
3605 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
3606 */
3607 if ( pPage->enmKind == PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4
3608 && PGMGetHyperCR3(CTXSUFF(pPool->pVM)) == pPage->Core.Key)
3609 {
3610 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3611 return VINF_SUCCESS;
3612 }
3613 /* Safety precaution in case we change the paging for other modes too in the future. */
3614 AssertFatal(PGMGetHyperCR3(CTXSUFF(pPool->pVM)) != pPage->Core.Key);
3615
3616 /*
3617 * Mark the page as being in need of a ASMMemZeroPage().
3618 */
3619 pPage->fZeroed = false;
3620
3621#ifdef PGMPOOL_WITH_USER_TRACKING
3622 /*
3623 * Clear the page.
3624 */
3625 pgmPoolTrackClearPageUsers(pPool, pPage);
3626 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3627 pgmPoolTrackDeref(pPool, pPage);
3628 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3629#endif
3630
3631#ifdef PGMPOOL_WITH_CACHE
3632 /*
3633 * Flush it from the cache.
3634 */
3635 pgmPoolCacheFlushPage(pPool, pPage);
3636#endif /* PGMPOOL_WITH_CACHE */
3637
3638#ifdef PGMPOOL_WITH_MONITORING
3639 /*
3640 * Deregistering the monitoring.
3641 */
3642 if (pPage->fMonitored)
3643 rc = pgmPoolMonitorFlush(pPool, pPage);
3644#endif
3645
3646 /*
3647 * Free the page.
3648 */
3649 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3650 pPage->iNext = pPool->iFreeHead;
3651 pPool->iFreeHead = pPage->idx;
3652 pPage->enmKind = PGMPOOLKIND_FREE;
3653 pPage->GCPhys = NIL_RTGCPHYS;
3654 pPage->fReusedFlushPending = false;
3655
3656 pPool->cUsedPages--;
3657 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3658 return rc;
3659}
3660
3661
3662/**
3663 * Frees a usage of a pool page.
3664 *
3665 * The caller is responsible to updating the user table so that it no longer
3666 * references the shadow page.
3667 *
3668 * @param pPool The pool.
3669 * @param HCPhys The HC physical address of the shadow page.
3670 * @param iUser The shadow page pool index of the user table.
3671 * @param iUserTable The index into the user table (shadowed).
3672 */
3673void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3674{
3675 STAM_PROFILE_START(&pPool->StatFree, a);
3676 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3677 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3678 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3679#ifdef PGMPOOL_WITH_USER_TRACKING
3680 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3681#endif
3682#ifdef PGMPOOL_WITH_CACHE
3683 if (!pPage->fCached)
3684#endif
3685 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3686 STAM_PROFILE_STOP(&pPool->StatFree, a);
3687}
3688
3689
3690/**
3691 * Makes one or more free page free.
3692 *
3693 * @returns VBox status code.
3694 * @retval VINF_SUCCESS on success.
3695 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3696 *
3697 * @param pPool The pool.
3698 * @param iUser The user of the page.
3699 */
3700static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3701{
3702 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3703
3704 /*
3705 * If the pool isn't full grown yet, expand it.
3706 */
3707 if (pPool->cCurPages < pPool->cMaxPages)
3708 {
3709 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3710#ifdef IN_RING3
3711 int rc = PGMR3PoolGrow(pPool->pVMHC);
3712#else
3713 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3714#endif
3715 if (VBOX_FAILURE(rc))
3716 return rc;
3717 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3718 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3719 return VINF_SUCCESS;
3720 }
3721
3722#ifdef PGMPOOL_WITH_CACHE
3723 /*
3724 * Free one cached page.
3725 */
3726 return pgmPoolCacheFreeOne(pPool, iUser);
3727#else
3728 /*
3729 * Flush the pool.
3730 * If we have tracking enabled, it should be possible to come up with
3731 * a cheap replacement strategy...
3732 */
3733 pgmPoolFlushAllInt(pPool);
3734 return VERR_PGM_POOL_FLUSHED;
3735#endif
3736}
3737
3738
3739/**
3740 * Allocates a page from the pool.
3741 *
3742 * This page may actually be a cached page and not in need of any processing
3743 * on the callers part.
3744 *
3745 * @returns VBox status code.
3746 * @retval VINF_SUCCESS if a NEW page was allocated.
3747 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3748 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3749 * @param pVM The VM handle.
3750 * @param GCPhys The GC physical address of the page we're gonna shadow.
3751 * For 4MB and 2MB PD entries, it's the first address the
3752 * shadow PT is covering.
3753 * @param enmKind The kind of mapping.
3754 * @param iUser The shadow page pool index of the user table.
3755 * @param iUserTable The index into the user table (shadowed).
3756 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3757 */
3758int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
3759{
3760 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3761 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3762 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3763 *ppPage = NULL;
3764
3765#ifdef PGMPOOL_WITH_CACHE
3766 if (pPool->fCacheEnabled)
3767 {
3768 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3769 if (VBOX_SUCCESS(rc2))
3770 {
3771 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3772 LogFlow(("pgmPoolAlloc: cached returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3773 return rc2;
3774 }
3775 }
3776#endif
3777
3778 /*
3779 * Allocate a new one.
3780 */
3781 int rc = VINF_SUCCESS;
3782 uint16_t iNew = pPool->iFreeHead;
3783 if (iNew == NIL_PGMPOOL_IDX)
3784 {
3785 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3786 if (VBOX_FAILURE(rc))
3787 {
3788 if (rc != VERR_PGM_POOL_CLEARED)
3789 {
3790 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3791 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3792 return rc;
3793 }
3794 Log(("pgmPoolMakeMoreFreePages failed with %Vrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
3795 rc = VERR_PGM_POOL_FLUSHED;
3796 }
3797 iNew = pPool->iFreeHead;
3798 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3799 }
3800
3801 /* unlink the free head */
3802 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3803 pPool->iFreeHead = pPage->iNext;
3804 pPage->iNext = NIL_PGMPOOL_IDX;
3805
3806 /*
3807 * Initialize it.
3808 */
3809 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3810 pPage->enmKind = enmKind;
3811 pPage->GCPhys = GCPhys;
3812 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3813 pPage->fMonitored = false;
3814 pPage->fCached = false;
3815 pPage->fReusedFlushPending = false;
3816 pPage->fCR3Mix = false;
3817#ifdef PGMPOOL_WITH_MONITORING
3818 pPage->cModifications = 0;
3819 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3820 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3821#endif
3822#ifdef PGMPOOL_WITH_USER_TRACKING
3823 pPage->cPresent = 0;
3824 pPage->iFirstPresent = ~0;
3825
3826 /*
3827 * Insert into the tracking and cache. If this fails, free the page.
3828 */
3829 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3830 if (VBOX_FAILURE(rc3))
3831 {
3832 if (rc3 != VERR_PGM_POOL_CLEARED)
3833 {
3834 pPool->cUsedPages--;
3835 pPage->enmKind = PGMPOOLKIND_FREE;
3836 pPage->GCPhys = NIL_RTGCPHYS;
3837 pPage->iNext = pPool->iFreeHead;
3838 pPool->iFreeHead = pPage->idx;
3839 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3840 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3841 return rc3;
3842 }
3843 Log(("pgmPoolTrackInsert failed with %Vrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
3844 rc = VERR_PGM_POOL_FLUSHED;
3845 }
3846#endif /* PGMPOOL_WITH_USER_TRACKING */
3847
3848 /*
3849 * Commit the allocation, clear the page and return.
3850 */
3851#ifdef VBOX_WITH_STATISTICS
3852 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3853 pPool->cUsedPagesHigh = pPool->cUsedPages;
3854#endif
3855
3856 if (!pPage->fZeroed)
3857 {
3858 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3859 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3860 ASMMemZeroPage(pv);
3861 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3862 }
3863
3864 *ppPage = pPage;
3865 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3866 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3867 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3868 return rc;
3869}
3870
3871
3872/**
3873 * Frees a usage of a pool page.
3874 *
3875 * @param pVM The VM handle.
3876 * @param HCPhys The HC physical address of the shadow page.
3877 * @param iUser The shadow page pool index of the user table.
3878 * @param iUserTable The index into the user table (shadowed).
3879 */
3880void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
3881{
3882 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
3883 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3884 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
3885}
3886
3887
3888/**
3889 * Gets a in-use page in the pool by it's physical address.
3890 *
3891 * @returns Pointer to the page.
3892 * @param pVM The VM handle.
3893 * @param HCPhys The HC physical address of the shadow page.
3894 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
3895 */
3896PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
3897{
3898 /** @todo profile this! */
3899 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3900 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
3901 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
3902 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
3903 return pPage;
3904}
3905
3906
3907/**
3908 * Flushes the entire cache.
3909 *
3910 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3911 * and execute this CR3 flush.
3912 *
3913 * @param pPool The pool.
3914 */
3915void pgmPoolFlushAll(PVM pVM)
3916{
3917 LogFlow(("pgmPoolFlushAll:\n"));
3918 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
3919}
3920
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette