VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 16626

Last change on this file since 16626 was 16626, checked in by vboxsync, 16 years ago

VBOX_WITH_PGMPOOL_PAGING_ONLY: Deal with split PDs in pae/32 bit case.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 161.5 KB
Line 
1/* $Id: PGMAllPool.cpp 16626 2009-02-10 12:41:48Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42
43
44/*******************************************************************************
45* Internal Functions *
46*******************************************************************************/
47__BEGIN_DECLS
48static void pgmPoolFlushAllInt(PPGMPOOL pPool);
49#ifdef PGMPOOL_WITH_USER_TRACKING
50DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
51DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#endif
54#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
55static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
56#endif
57#ifdef PGMPOOL_WITH_CACHE
58static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
59#endif
60#ifdef PGMPOOL_WITH_MONITORING
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#endif
63#ifndef IN_RING3
64DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
65#endif
66__END_DECLS
67
68
69/**
70 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
71 *
72 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
73 * @param enmKind The page kind.
74 */
75DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
76{
77 switch (enmKind)
78 {
79 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
82 return true;
83 default:
84 return false;
85 }
86}
87
88
89#if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
90/**
91 * Maps a pool page into the current context.
92 *
93 * @returns Pointer to the mapping.
94 * @param pPGM Pointer to the PGM instance data.
95 * @param pPage The page to map.
96 */
97void *pgmPoolMapPageFallback(PPGM pPGM, PPGMPOOLPAGE pPage)
98{
99 /* general pages are take care of by the inlined part, it
100 only ends up here in case of failure. */
101 AssertReleaseReturn(pPage->idx < PGMPOOL_IDX_FIRST, NULL);
102
103/** @todo make sure HCPhys is valid for *all* indexes. */
104 /* special pages. */
105# ifdef IN_RC
106 switch (pPage->idx)
107 {
108# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
109 case PGMPOOL_IDX_PD:
110 case PGMPOOL_IDX_PDPT:
111 case PGMPOOL_IDX_AMD64_CR3:
112 return pPGM->pShwRootRC;
113# else
114 case PGMPOOL_IDX_PD:
115 return pPGM->pShw32BitPdRC;
116 case PGMPOOL_IDX_PAE_PD:
117 case PGMPOOL_IDX_PAE_PD_0:
118 return pPGM->apShwPaePDsRC[0];
119 case PGMPOOL_IDX_PAE_PD_1:
120 return pPGM->apShwPaePDsRC[1];
121 case PGMPOOL_IDX_PAE_PD_2:
122 return pPGM->apShwPaePDsRC[2];
123 case PGMPOOL_IDX_PAE_PD_3:
124 return pPGM->apShwPaePDsRC[3];
125 case PGMPOOL_IDX_PDPT:
126 return pPGM->pShwPaePdptRC;
127# endif
128 default:
129 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
130 return NULL;
131 }
132
133# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
134 RTHCPHYS HCPhys;
135 switch (pPage->idx)
136 {
137# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
138 case PGMPOOL_IDX_PD:
139 case PGMPOOL_IDX_PDPT:
140 case PGMPOOL_IDX_AMD64_CR3:
141 HCPhys = pPGM->HCPhysShwCR3;
142 break;
143
144 case PGMPOOL_IDX_NESTED_ROOT:
145 HCPhys = pPGM->HCPhysShwNestedRoot;
146 break;
147# else
148 case PGMPOOL_IDX_PD:
149 HCPhys = pPGM->HCPhysShw32BitPD;
150 break;
151 case PGMPOOL_IDX_PAE_PD_0:
152 HCPhys = pPGM->aHCPhysPaePDs[0];
153 break;
154 case PGMPOOL_IDX_PAE_PD_1:
155 HCPhys = pPGM->aHCPhysPaePDs[1];
156 break;
157 case PGMPOOL_IDX_PAE_PD_2:
158 HCPhys = pPGM->aHCPhysPaePDs[2];
159 break;
160 case PGMPOOL_IDX_PAE_PD_3:
161 HCPhys = pPGM->aHCPhysPaePDs[3];
162 break;
163 case PGMPOOL_IDX_PDPT:
164 HCPhys = pPGM->HCPhysShwPaePdpt;
165 break;
166 case PGMPOOL_IDX_NESTED_ROOT:
167 HCPhys = pPGM->HCPhysShwNestedRoot;
168 break;
169 case PGMPOOL_IDX_PAE_PD:
170 AssertReleaseMsgFailed(("PGMPOOL_IDX_PAE_PD is not usable in VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 context\n"));
171 return NULL;
172# endif
173 default:
174 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
175 return NULL;
176 }
177 AssertMsg(HCPhys && HCPhys != NIL_RTHCPHYS && !(PAGE_OFFSET_MASK & HCPhys), ("%RHp\n", HCPhys));
178
179 void *pv;
180 pgmR0DynMapHCPageInlined(pPGM, HCPhys, &pv);
181 return pv;
182# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
183}
184#endif /* IN_RC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
185
186
187#ifdef PGMPOOL_WITH_MONITORING
188/**
189 * Determin the size of a write instruction.
190 * @returns number of bytes written.
191 * @param pDis The disassembler state.
192 */
193static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
194{
195 /*
196 * This is very crude and possibly wrong for some opcodes,
197 * but since it's not really supposed to be called we can
198 * probably live with that.
199 */
200 return DISGetParamSize(pDis, &pDis->param1);
201}
202
203
204/**
205 * Flushes a chain of pages sharing the same access monitor.
206 *
207 * @returns VBox status code suitable for scheduling.
208 * @param pPool The pool.
209 * @param pPage A page in the chain.
210 */
211int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
212{
213 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
214
215 /*
216 * Find the list head.
217 */
218 uint16_t idx = pPage->idx;
219 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
220 {
221 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
222 {
223 idx = pPage->iMonitoredPrev;
224 Assert(idx != pPage->idx);
225 pPage = &pPool->aPages[idx];
226 }
227 }
228
229 /*
230 * Iterate the list flushing each shadow page.
231 */
232 int rc = VINF_SUCCESS;
233 for (;;)
234 {
235 idx = pPage->iMonitoredNext;
236 Assert(idx != pPage->idx);
237 if (pPage->idx >= PGMPOOL_IDX_FIRST)
238 {
239 int rc2 = pgmPoolFlushPage(pPool, pPage);
240 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
241 rc = VINF_PGM_SYNC_CR3;
242 }
243 /* next */
244 if (idx == NIL_PGMPOOL_IDX)
245 break;
246 pPage = &pPool->aPages[idx];
247 }
248 return rc;
249}
250
251
252/**
253 * Wrapper for getting the current context pointer to the entry being modified.
254 *
255 * @returns Pointer to the current context mapping of the entry.
256 * @param pPool The pool.
257 * @param pvFault The fault virtual address.
258 * @param GCPhysFault The fault physical address.
259 * @param cbEntry The entry size.
260 */
261#ifdef IN_RING3
262DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
263#else
264DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
265#endif
266{
267#ifdef IN_RC
268 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
269
270#elif defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
271 void *pvRet;
272 int rc = PGMDynMapGCPageOff(pPool->pVMR0, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
273 AssertFatalRCSuccess(rc);
274 return pvRet;
275
276#elif defined(IN_RING0)
277 void *pvRet;
278 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMR0->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
279 AssertFatalRCSuccess(rc);
280 return pvRet;
281
282#elif defined(IN_RING3)
283 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
284#else
285# error "huh?"
286#endif
287}
288
289
290/**
291 * Process shadow entries before they are changed by the guest.
292 *
293 * For PT entries we will clear them. For PD entries, we'll simply check
294 * for mapping conflicts and set the SyncCR3 FF if found.
295 *
296 * @param pPool The pool.
297 * @param pPage The head page.
298 * @param GCPhysFault The guest physical fault address.
299 * @param uAddress In R0 and GC this is the guest context fault address (flat).
300 * In R3 this is the host context 'fault' address.
301 * @param pCpu The disassembler state for figuring out the write size.
302 * This need not be specified if the caller knows we won't do cross entry accesses.
303 */
304#ifdef IN_RING3
305void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
306#else
307void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
308#endif
309{
310 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
311 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
312 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
313
314 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
315
316 for (;;)
317 {
318 union
319 {
320 void *pv;
321 PX86PT pPT;
322 PX86PTPAE pPTPae;
323 PX86PD pPD;
324 PX86PDPAE pPDPae;
325 PX86PDPT pPDPT;
326 PX86PML4 pPML4;
327 } uShw;
328
329 switch (pPage->enmKind)
330 {
331 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
332 {
333 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
334 const unsigned iShw = off / sizeof(X86PTE);
335 if (uShw.pPT->a[iShw].n.u1Present)
336 {
337# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
338 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
339 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
340 pgmPoolTracDerefGCPhysHint(pPool, pPage,
341 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
342 pGstPte->u & X86_PTE_PG_MASK);
343# endif
344 uShw.pPT->a[iShw].u = 0;
345 }
346 break;
347 }
348
349 /* page/2 sized */
350 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
351 {
352 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
353 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
354 {
355 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
356 if (uShw.pPTPae->a[iShw].n.u1Present)
357 {
358# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
359 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
360 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
361 pgmPoolTracDerefGCPhysHint(pPool, pPage,
362 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
363 pGstPte->u & X86_PTE_PG_MASK);
364# endif
365 uShw.pPTPae->a[iShw].u = 0;
366 }
367 }
368 break;
369 }
370
371# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
372 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
373 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
374 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
375 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
376 {
377 unsigned iGst = off / sizeof(X86PDE);
378 unsigned iShwPdpt = iGst / 256;
379 unsigned iShw = (iGst % 256) * 2;
380 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
381
382 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: idx = %d\n", iShwPdpt));
383 if (iShwPdpt == pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
384 {
385 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
386 {
387 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
388 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
389 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw));
390 }
391 /* paranoia / a bit assumptive. */
392 else if ( pCpu
393 && (off & 3)
394 && (off & 3) + cbWrite > 4)
395 {
396 const unsigned iShw2 = iShw + 2;
397 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a) /** @todo was completely wrong, it's better now after #1865 but still wrong from cross PD. */
398 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
399 {
400 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
401 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
402 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
403 }
404 }
405#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
406 if ( uShw.pPDPae->a[iShw].n.u1Present
407 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
408 {
409 LogFlow(("pgmPoolMonitorChainChanging: iShwPdpt=%#x iShw=%#x: %RX64 -> freeing it!\n", iShwPdpt, iShw, uShw.pPDPae->a[iShw].u));
410# ifdef IN_RC /* TLB load - we're pushing things a bit... */
411 ASMProbeReadByte(pvAddress);
412# endif
413 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + iShwPdpt * X86_PG_PAE_ENTRIES);
414 uShw.pPDPae->a[iShw].u = 0;
415 }
416#endif
417 }
418 break;
419 }
420# endif
421
422
423 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
424 {
425 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
426 const unsigned iShw = off / sizeof(X86PTEPAE);
427 if (uShw.pPTPae->a[iShw].n.u1Present)
428 {
429# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
430 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
431 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
432 pgmPoolTracDerefGCPhysHint(pPool, pPage,
433 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
434 pGstPte->u & X86_PTE_PAE_PG_MASK);
435# endif
436 uShw.pPTPae->a[iShw].u = 0;
437 }
438
439 /* paranoia / a bit assumptive. */
440 if ( pCpu
441 && (off & 7)
442 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
443 {
444 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
445 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
446
447 if (uShw.pPTPae->a[iShw2].n.u1Present)
448 {
449# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
450 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
451 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
452 pgmPoolTracDerefGCPhysHint(pPool, pPage,
453 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
454 pGstPte->u & X86_PTE_PAE_PG_MASK);
455# endif
456 uShw.pPTPae->a[iShw2].u = 0;
457 }
458 }
459
460 break;
461 }
462
463# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
464 case PGMPOOLKIND_32BIT_PD:
465# else
466 case PGMPOOLKIND_ROOT_32BIT_PD:
467# endif
468 {
469 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
470 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
471 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
472 {
473 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
474 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
475 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
476 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
477 }
478 /* paranoia / a bit assumptive. */
479 else if ( pCpu
480 && (off & 3)
481 && (off & 3) + cbWrite > sizeof(X86PTE))
482 {
483 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
484 if ( iShw2 != iShw
485 && iShw2 < RT_ELEMENTS(uShw.pPD->a)
486 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
487 {
488 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
489 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
490 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
491 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
492 }
493 }
494#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
495 if ( uShw.pPD->a[iShw].n.u1Present
496 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
497 {
498 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
499# ifdef IN_RC /* TLB load - we're pushing things a bit... */
500 ASMProbeReadByte(pvAddress);
501# endif
502 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
503 uShw.pPD->a[iShw].u = 0;
504 }
505#endif
506 break;
507 }
508
509# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
510 case PGMPOOLKIND_ROOT_PAE_PD:
511 {
512 unsigned iGst = off / sizeof(X86PDE); // ASSUMING 32-bit guest paging!
513 unsigned iShwPdpt = iGst / 256;
514 unsigned iShw = (iGst % 256) * 2;
515 Assert(pPage->idx == PGMPOOL_IDX_PAE_PD);
516 PPGMPOOLPAGE pPage2 = pPage + 1 + iShwPdpt;
517 Assert(pPage2->idx == PGMPOOL_IDX_PAE_PD_0 + iShwPdpt);
518 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage2);
519 for (unsigned i = 0; i < 2; i++, iShw++)
520 {
521 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
522 {
523 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
524 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
525 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw));
526 }
527 /* paranoia / a bit assumptive. */
528 else if ( pCpu
529 && (off & 3)
530 && (off & 3) + cbWrite > 4)
531 {
532 const unsigned iShw2 = iShw + 2;
533 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a) /** @todo was completely wrong, it's better now after #1865 but still wrong from cross PD. */
534 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
535 {
536 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
537 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
538 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
539 }
540 }
541#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
542 if ( uShw.pPDPae->a[iShw].n.u1Present
543 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
544 {
545 LogFlow(("pgmPoolMonitorChainChanging: iShwPdpt=%#x iShw=%#x: %RX64 -> freeing it!\n", iShwPdpt, iShw, uShw.pPDPae->a[iShw].u));
546# ifdef IN_RC /* TLB load - we're pushing things a bit... */
547 ASMProbeReadByte(pvAddress);
548# endif
549 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + iShwPdpt * X86_PG_PAE_ENTRIES);
550 uShw.pPDPae->a[iShw].u = 0;
551 }
552#endif
553 }
554 break;
555 }
556# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
557
558 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
559 {
560 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
561 const unsigned iShw = off / sizeof(X86PDEPAE);
562 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
563 {
564 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
565 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
566 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
567 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
568 }
569#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
570 /*
571 * Causes trouble when the guest uses a PDE to refer to the whole page table level
572 * structure. (Invalidate here; faults later on when it tries to change the page
573 * table entries -> recheck; probably only applies to the RC case.)
574 */
575 else
576 {
577 if (uShw.pPDPae->a[iShw].n.u1Present)
578 {
579 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
580 pgmPoolFree(pPool->CTX_SUFF(pVM),
581 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
582 /* Note: hardcoded PAE implementation dependency */
583 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
584 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
585 uShw.pPDPae->a[iShw].u = 0;
586 }
587 }
588#endif
589 /* paranoia / a bit assumptive. */
590 if ( pCpu
591 && (off & 7)
592 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
593 {
594 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
595 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
596
597 if ( iShw2 != iShw
598 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
599 {
600 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
601 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
602 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
603 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
604 }
605#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
606 else if (uShw.pPDPae->a[iShw2].n.u1Present)
607 {
608 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
609 pgmPoolFree(pPool->CTX_SUFF(pVM),
610 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
611 /* Note: hardcoded PAE implementation dependency */
612 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
613 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
614 uShw.pPDPae->a[iShw2].u = 0;
615 }
616#endif
617 }
618 break;
619 }
620
621# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
622 case PGMPOOLKIND_PAE_PDPT:
623# else
624 case PGMPOOLKIND_ROOT_PDPT:
625# endif
626 {
627 /*
628 * Hopefully this doesn't happen very often:
629 * - touching unused parts of the page
630 * - messing with the bits of pd pointers without changing the physical address
631 */
632 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
633 const unsigned iShw = off / sizeof(X86PDPE);
634 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
635 {
636 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
637 {
638 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
639 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
640 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
641 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
642 }
643 /* paranoia / a bit assumptive. */
644 else if ( pCpu
645 && (off & 7)
646 && (off & 7) + cbWrite > sizeof(X86PDPE))
647 {
648 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
649 if ( iShw2 != iShw
650 && iShw2 < X86_PG_PAE_PDPE_ENTRIES
651 && uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
652 {
653 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
654 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
655 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
656 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
657 }
658 }
659 }
660 break;
661 }
662
663#ifndef IN_RC
664 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
665 {
666 Assert(pPage->enmKind == PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD);
667
668 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
669 const unsigned iShw = off / sizeof(X86PDEPAE);
670 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
671 {
672 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
673 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
674 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
675 }
676 else
677 {
678 if (uShw.pPDPae->a[iShw].n.u1Present)
679 {
680 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
681 pgmPoolFree(pPool->CTX_SUFF(pVM),
682 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
683 pPage->idx,
684 iShw);
685 uShw.pPDPae->a[iShw].u = 0;
686 }
687 }
688 /* paranoia / a bit assumptive. */
689 if ( pCpu
690 && (off & 7)
691 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
692 {
693 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
694 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
695
696 if ( iShw2 != iShw
697 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
698 {
699 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
700 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
701 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
702 }
703 else
704 if (uShw.pPDPae->a[iShw2].n.u1Present)
705 {
706 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
707 pgmPoolFree(pPool->CTX_SUFF(pVM),
708 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
709 pPage->idx,
710 iShw2);
711 uShw.pPDPae->a[iShw2].u = 0;
712 }
713 }
714 break;
715 }
716
717 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
718 {
719 /*
720 * Hopefully this doesn't happen very often:
721 * - messing with the bits of pd pointers without changing the physical address
722 */
723 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
724 {
725 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
726 const unsigned iShw = off / sizeof(X86PDPE);
727 if (uShw.pPDPT->a[iShw].n.u1Present)
728 {
729 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
730 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
731 uShw.pPDPT->a[iShw].u = 0;
732 }
733 /* paranoia / a bit assumptive. */
734 if ( pCpu
735 && (off & 7)
736 && (off & 7) + cbWrite > sizeof(X86PDPE))
737 {
738 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
739 if (uShw.pPDPT->a[iShw2].n.u1Present)
740 {
741 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
742 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
743 uShw.pPDPT->a[iShw2].u = 0;
744 }
745 }
746 }
747 break;
748 }
749
750 case PGMPOOLKIND_64BIT_PML4:
751 {
752 /*
753 * Hopefully this doesn't happen very often:
754 * - messing with the bits of pd pointers without changing the physical address
755 */
756 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
757 {
758 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
759 const unsigned iShw = off / sizeof(X86PDPE);
760 if (uShw.pPML4->a[iShw].n.u1Present)
761 {
762 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
763 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
764 uShw.pPML4->a[iShw].u = 0;
765 }
766 /* paranoia / a bit assumptive. */
767 if ( pCpu
768 && (off & 7)
769 && (off & 7) + cbWrite > sizeof(X86PDPE))
770 {
771 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
772 if (uShw.pPML4->a[iShw2].n.u1Present)
773 {
774 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
775 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
776 uShw.pPML4->a[iShw2].u = 0;
777 }
778 }
779 }
780 break;
781 }
782#endif /* IN_RING0 */
783
784 default:
785 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
786 }
787
788 /* next */
789 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
790 return;
791 pPage = &pPool->aPages[pPage->iMonitoredNext];
792 }
793}
794
795
796# ifndef IN_RING3
797/**
798 * Checks if a access could be a fork operation in progress.
799 *
800 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
801 *
802 * @returns true if it's likly that we're forking, otherwise false.
803 * @param pPool The pool.
804 * @param pCpu The disassembled instruction.
805 * @param offFault The access offset.
806 */
807DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
808{
809 /*
810 * i386 linux is using btr to clear X86_PTE_RW.
811 * The functions involved are (2.6.16 source inspection):
812 * clear_bit
813 * ptep_set_wrprotect
814 * copy_one_pte
815 * copy_pte_range
816 * copy_pmd_range
817 * copy_pud_range
818 * copy_page_range
819 * dup_mmap
820 * dup_mm
821 * copy_mm
822 * copy_process
823 * do_fork
824 */
825 if ( pCpu->pCurInstr->opcode == OP_BTR
826 && !(offFault & 4)
827 /** @todo Validate that the bit index is X86_PTE_RW. */
828 )
829 {
830 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
831 return true;
832 }
833 return false;
834}
835
836
837/**
838 * Determine whether the page is likely to have been reused.
839 *
840 * @returns true if we consider the page as being reused for a different purpose.
841 * @returns false if we consider it to still be a paging page.
842 * @param pVM VM Handle.
843 * @param pPage The page in question.
844 * @param pRegFrame Trap register frame.
845 * @param pCpu The disassembly info for the faulting instruction.
846 * @param pvFault The fault address.
847 *
848 * @remark The REP prefix check is left to the caller because of STOSD/W.
849 */
850DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
851{
852#ifndef IN_RC
853 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
854 if ( HWACCMHasPendingIrq(pVM)
855 && (pRegFrame->rsp - pvFault) < 32)
856 {
857 /* Fault caused by stack writes while trying to inject an interrupt event. */
858 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
859 return true;
860 }
861#else
862 NOREF(pVM); NOREF(pvFault);
863#endif
864
865 switch (pCpu->pCurInstr->opcode)
866 {
867 /* call implies the actual push of the return address faulted */
868 case OP_CALL:
869 Log4(("pgmPoolMonitorIsReused: CALL\n"));
870 return true;
871 case OP_PUSH:
872 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
873 return true;
874 case OP_PUSHF:
875 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
876 return true;
877 case OP_PUSHA:
878 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
879 return true;
880 case OP_FXSAVE:
881 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
882 return true;
883 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
884 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
885 return true;
886 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
887 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
888 return true;
889 case OP_MOVSWD:
890 case OP_STOSWD:
891 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
892 && pRegFrame->rcx >= 0x40
893 )
894 {
895 Assert(pCpu->mode == CPUMODE_64BIT);
896
897 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
898 return true;
899 }
900 return false;
901 }
902 if ( (pCpu->param1.flags & USE_REG_GEN32)
903 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
904 {
905 Log4(("pgmPoolMonitorIsReused: ESP\n"));
906 return true;
907 }
908
909 //if (pPage->fCR3Mix)
910 // return false;
911 return false;
912}
913
914
915/**
916 * Flushes the page being accessed.
917 *
918 * @returns VBox status code suitable for scheduling.
919 * @param pVM The VM handle.
920 * @param pPool The pool.
921 * @param pPage The pool page (head).
922 * @param pCpu The disassembly of the write instruction.
923 * @param pRegFrame The trap register frame.
924 * @param GCPhysFault The fault address as guest physical address.
925 * @param pvFault The fault address.
926 */
927static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
928 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
929{
930 /*
931 * First, do the flushing.
932 */
933 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
934
935 /*
936 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
937 */
938 uint32_t cbWritten;
939 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
940 if (RT_SUCCESS(rc2))
941 pRegFrame->rip += pCpu->opsize;
942 else if (rc2 == VERR_EM_INTERPRETER)
943 {
944#ifdef IN_RC
945 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
946 {
947 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
948 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
949 rc = VINF_SUCCESS;
950 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
951 }
952 else
953#endif
954 {
955 rc = VINF_EM_RAW_EMULATE_INSTR;
956 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
957 }
958 }
959 else
960 rc = rc2;
961
962 /* See use in pgmPoolAccessHandlerSimple(). */
963 PGM_INVL_GUEST_TLBS();
964
965 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
966 return rc;
967
968}
969
970
971/**
972 * Handles the STOSD write accesses.
973 *
974 * @returns VBox status code suitable for scheduling.
975 * @param pVM The VM handle.
976 * @param pPool The pool.
977 * @param pPage The pool page (head).
978 * @param pCpu The disassembly of the write instruction.
979 * @param pRegFrame The trap register frame.
980 * @param GCPhysFault The fault address as guest physical address.
981 * @param pvFault The fault address.
982 */
983DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
984 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
985{
986 Assert(pCpu->mode == CPUMODE_32BIT);
987
988 /*
989 * Increment the modification counter and insert it into the list
990 * of modified pages the first time.
991 */
992 if (!pPage->cModifications++)
993 pgmPoolMonitorModifiedInsert(pPool, pPage);
994
995 /*
996 * Execute REP STOSD.
997 *
998 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
999 * write situation, meaning that it's safe to write here.
1000 */
1001#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1002 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1003#endif
1004 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
1005 while (pRegFrame->ecx)
1006 {
1007#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1008 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1009 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1010 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1011#else
1012 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1013#endif
1014#ifdef IN_RC
1015 *(uint32_t *)pu32 = pRegFrame->eax;
1016#else
1017 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
1018#endif
1019 pu32 += 4;
1020 GCPhysFault += 4;
1021 pRegFrame->edi += 4;
1022 pRegFrame->ecx--;
1023 }
1024 pRegFrame->rip += pCpu->opsize;
1025
1026 /* See use in pgmPoolAccessHandlerSimple(). */
1027 PGM_INVL_GUEST_TLBS();
1028
1029 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1030 return VINF_SUCCESS;
1031}
1032
1033
1034/**
1035 * Handles the simple write accesses.
1036 *
1037 * @returns VBox status code suitable for scheduling.
1038 * @param pVM The VM handle.
1039 * @param pPool The pool.
1040 * @param pPage The pool page (head).
1041 * @param pCpu The disassembly of the write instruction.
1042 * @param pRegFrame The trap register frame.
1043 * @param GCPhysFault The fault address as guest physical address.
1044 * @param pvFault The fault address.
1045 */
1046DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1047 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1048{
1049 /*
1050 * Increment the modification counter and insert it into the list
1051 * of modified pages the first time.
1052 */
1053 if (!pPage->cModifications++)
1054 pgmPoolMonitorModifiedInsert(pPool, pPage);
1055
1056 /*
1057 * Clear all the pages. ASSUMES that pvFault is readable.
1058 */
1059#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1060 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1061 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1062 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1063 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1064#else
1065 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1066#endif
1067
1068 /*
1069 * Interpret the instruction.
1070 */
1071 uint32_t cb;
1072 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
1073 if (RT_SUCCESS(rc))
1074 pRegFrame->rip += pCpu->opsize;
1075 else if (rc == VERR_EM_INTERPRETER)
1076 {
1077 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1078 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
1079 rc = VINF_EM_RAW_EMULATE_INSTR;
1080 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1081 }
1082
1083 /*
1084 * Quick hack, with logging enabled we're getting stale
1085 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1086 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1087 * have to be fixed to support this. But that'll have to wait till next week.
1088 *
1089 * An alternative is to keep track of the changed PTEs together with the
1090 * GCPhys from the guest PT. This may proove expensive though.
1091 *
1092 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1093 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1094 */
1095 PGM_INVL_GUEST_TLBS();
1096
1097 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1098 return rc;
1099}
1100
1101
1102/**
1103 * \#PF Handler callback for PT write accesses.
1104 *
1105 * @returns VBox status code (appropriate for GC return).
1106 * @param pVM VM Handle.
1107 * @param uErrorCode CPU Error code.
1108 * @param pRegFrame Trap register frame.
1109 * NULL on DMA and other non CPU access.
1110 * @param pvFault The fault address (cr2).
1111 * @param GCPhysFault The GC physical address corresponding to pvFault.
1112 * @param pvUser User argument.
1113 */
1114DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1115{
1116 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1117 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1118 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1119 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1120
1121 /*
1122 * We should ALWAYS have the list head as user parameter. This
1123 * is because we use that page to record the changes.
1124 */
1125 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1126
1127 /*
1128 * Disassemble the faulting instruction.
1129 */
1130 DISCPUSTATE Cpu;
1131 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1132 AssertRCReturn(rc, rc);
1133
1134 /*
1135 * Check if it's worth dealing with.
1136 */
1137 bool fReused = false;
1138 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1139 || pPage->fCR3Mix)
1140 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1141 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1142 {
1143 /*
1144 * Simple instructions, no REP prefix.
1145 */
1146 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1147 {
1148 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1149 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1150 return rc;
1151 }
1152
1153 /*
1154 * Windows is frequently doing small memset() operations (netio test 4k+).
1155 * We have to deal with these or we'll kill the cache and performance.
1156 */
1157 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1158 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1159 && pRegFrame->ecx <= 0x20
1160 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1161 && !((uintptr_t)pvFault & 3)
1162 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1163 && Cpu.mode == CPUMODE_32BIT
1164 && Cpu.opmode == CPUMODE_32BIT
1165 && Cpu.addrmode == CPUMODE_32BIT
1166 && Cpu.prefix == PREFIX_REP
1167 && !pRegFrame->eflags.Bits.u1DF
1168 )
1169 {
1170 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1171 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1172 return rc;
1173 }
1174
1175 /* REP prefix, don't bother. */
1176 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1177 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1178 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1179 }
1180
1181 /*
1182 * Not worth it, so flush it.
1183 *
1184 * If we considered it to be reused, don't to back to ring-3
1185 * to emulate failed instructions since we usually cannot
1186 * interpret then. This may be a bit risky, in which case
1187 * the reuse detection must be fixed.
1188 */
1189 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1190 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1191 rc = VINF_SUCCESS;
1192 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1193 return rc;
1194}
1195
1196# endif /* !IN_RING3 */
1197#endif /* PGMPOOL_WITH_MONITORING */
1198
1199#ifdef PGMPOOL_WITH_CACHE
1200
1201/**
1202 * Inserts a page into the GCPhys hash table.
1203 *
1204 * @param pPool The pool.
1205 * @param pPage The page.
1206 */
1207DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1208{
1209 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1210 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1211 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1212 pPage->iNext = pPool->aiHash[iHash];
1213 pPool->aiHash[iHash] = pPage->idx;
1214}
1215
1216
1217/**
1218 * Removes a page from the GCPhys hash table.
1219 *
1220 * @param pPool The pool.
1221 * @param pPage The page.
1222 */
1223DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1224{
1225 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1226 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1227 if (pPool->aiHash[iHash] == pPage->idx)
1228 pPool->aiHash[iHash] = pPage->iNext;
1229 else
1230 {
1231 uint16_t iPrev = pPool->aiHash[iHash];
1232 for (;;)
1233 {
1234 const int16_t i = pPool->aPages[iPrev].iNext;
1235 if (i == pPage->idx)
1236 {
1237 pPool->aPages[iPrev].iNext = pPage->iNext;
1238 break;
1239 }
1240 if (i == NIL_PGMPOOL_IDX)
1241 {
1242 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1243 break;
1244 }
1245 iPrev = i;
1246 }
1247 }
1248 pPage->iNext = NIL_PGMPOOL_IDX;
1249}
1250
1251
1252/**
1253 * Frees up one cache page.
1254 *
1255 * @returns VBox status code.
1256 * @retval VINF_SUCCESS on success.
1257 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1258 * @param pPool The pool.
1259 * @param iUser The user index.
1260 */
1261static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1262{
1263#ifndef IN_RC
1264 const PVM pVM = pPool->CTX_SUFF(pVM);
1265#endif
1266 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1267 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1268
1269 /*
1270 * Select one page from the tail of the age list.
1271 */
1272 uint16_t iToFree = pPool->iAgeTail;
1273 if (iToFree == iUser)
1274 iToFree = pPool->aPages[iToFree].iAgePrev;
1275/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1276 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1277 {
1278 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1279 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1280 {
1281 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1282 continue;
1283 iToFree = i;
1284 break;
1285 }
1286 }
1287*/
1288
1289 Assert(iToFree != iUser);
1290 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1291
1292 PPGMPOOLPAGE pPage = &pPool->aPages[iToFree];
1293
1294 /*
1295 * Reject any attempts at flushing the currently active shadow CR3 mapping
1296 */
1297 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
1298 {
1299 /* Refresh the cr3 mapping by putting it at the head of the age list. */
1300 pgmPoolCacheUsed(pPool, pPage);
1301 return pgmPoolCacheFreeOne(pPool, iUser);
1302 }
1303
1304 int rc = pgmPoolFlushPage(pPool, pPage);
1305 if (rc == VINF_SUCCESS)
1306 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1307 return rc;
1308}
1309
1310
1311/**
1312 * Checks if a kind mismatch is really a page being reused
1313 * or if it's just normal remappings.
1314 *
1315 * @returns true if reused and the cached page (enmKind1) should be flushed
1316 * @returns false if not reused.
1317 * @param enmKind1 The kind of the cached page.
1318 * @param enmKind2 The kind of the requested page.
1319 */
1320static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1321{
1322 switch (enmKind1)
1323 {
1324 /*
1325 * Never reuse them. There is no remapping in non-paging mode.
1326 */
1327 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1328 case PGMPOOLKIND_32BIT_PD_PHYS:
1329 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1330 case PGMPOOLKIND_PAE_PD_PHYS:
1331 case PGMPOOLKIND_PAE_PDPT_PHYS:
1332 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1333 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1334 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1335 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1336 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1337#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1338 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1339 return false;
1340#else
1341 return true;
1342#endif
1343
1344 /*
1345 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1346 */
1347 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1348 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1349 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1350 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1351 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1352 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1353 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1354 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1355 switch (enmKind2)
1356 {
1357 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1358 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1359 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1360 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1361 case PGMPOOLKIND_64BIT_PML4:
1362 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1363 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1364 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1365 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1366 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1367 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1368 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1369 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1370 return true;
1371 default:
1372 return false;
1373 }
1374
1375 /*
1376 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1377 */
1378 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1379 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1380 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1381 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1382 case PGMPOOLKIND_64BIT_PML4:
1383 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1384 switch (enmKind2)
1385 {
1386 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1387 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1388 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1389 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1390 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1391 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1392 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1393 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1394 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1395 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1396 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1397 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1398 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1399 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1400 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1401 return true;
1402 default:
1403 return false;
1404 }
1405
1406 /*
1407 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1408 */
1409#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1410 case PGMPOOLKIND_ROOT_32BIT_PD:
1411 case PGMPOOLKIND_ROOT_PAE_PD:
1412 case PGMPOOLKIND_ROOT_PDPT:
1413#endif
1414 case PGMPOOLKIND_ROOT_NESTED:
1415 return false;
1416
1417 default:
1418 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1419 }
1420}
1421
1422
1423/**
1424 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1425 *
1426 * @returns VBox status code.
1427 * @retval VINF_PGM_CACHED_PAGE on success.
1428 * @retval VERR_FILE_NOT_FOUND if not found.
1429 * @param pPool The pool.
1430 * @param GCPhys The GC physical address of the page we're gonna shadow.
1431 * @param enmKind The kind of mapping.
1432 * @param iUser The shadow page pool index of the user table.
1433 * @param iUserTable The index into the user table (shadowed).
1434 * @param ppPage Where to store the pointer to the page.
1435 */
1436static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1437{
1438#ifndef IN_RC
1439 const PVM pVM = pPool->CTX_SUFF(pVM);
1440#endif
1441 /*
1442 * Look up the GCPhys in the hash.
1443 */
1444 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1445 Log3(("pgmPoolCacheAlloc: %RGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1446 if (i != NIL_PGMPOOL_IDX)
1447 {
1448 do
1449 {
1450 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1451 Log3(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1452 if (pPage->GCPhys == GCPhys)
1453 {
1454 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1455 {
1456 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1457 if (RT_SUCCESS(rc))
1458 {
1459 *ppPage = pPage;
1460 STAM_COUNTER_INC(&pPool->StatCacheHits);
1461 return VINF_PGM_CACHED_PAGE;
1462 }
1463 return rc;
1464 }
1465
1466 /*
1467 * The kind is different. In some cases we should now flush the page
1468 * as it has been reused, but in most cases this is normal remapping
1469 * of PDs as PT or big pages using the GCPhys field in a slightly
1470 * different way than the other kinds.
1471 */
1472 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1473 {
1474 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1475 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1476 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1477 break;
1478 }
1479 }
1480
1481 /* next */
1482 i = pPage->iNext;
1483 } while (i != NIL_PGMPOOL_IDX);
1484 }
1485
1486 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1487 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1488 return VERR_FILE_NOT_FOUND;
1489}
1490
1491
1492/**
1493 * Inserts a page into the cache.
1494 *
1495 * @param pPool The pool.
1496 * @param pPage The cached page.
1497 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1498 */
1499static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1500{
1501 /*
1502 * Insert into the GCPhys hash if the page is fit for that.
1503 */
1504 Assert(!pPage->fCached);
1505 if (fCanBeCached)
1506 {
1507 pPage->fCached = true;
1508 pgmPoolHashInsert(pPool, pPage);
1509 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1510 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1511 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1512 }
1513 else
1514 {
1515 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1516 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1517 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1518 }
1519
1520 /*
1521 * Insert at the head of the age list.
1522 */
1523 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1524 pPage->iAgeNext = pPool->iAgeHead;
1525 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1526 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1527 else
1528 pPool->iAgeTail = pPage->idx;
1529 pPool->iAgeHead = pPage->idx;
1530}
1531
1532
1533/**
1534 * Flushes a cached page.
1535 *
1536 * @param pPool The pool.
1537 * @param pPage The cached page.
1538 */
1539static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1540{
1541 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1542
1543 /*
1544 * Remove the page from the hash.
1545 */
1546 if (pPage->fCached)
1547 {
1548 pPage->fCached = false;
1549 pgmPoolHashRemove(pPool, pPage);
1550 }
1551 else
1552 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1553
1554 /*
1555 * Remove it from the age list.
1556 */
1557 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1558 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1559 else
1560 pPool->iAgeTail = pPage->iAgePrev;
1561 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1562 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1563 else
1564 pPool->iAgeHead = pPage->iAgeNext;
1565 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1566 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1567}
1568
1569#endif /* PGMPOOL_WITH_CACHE */
1570#ifdef PGMPOOL_WITH_MONITORING
1571
1572/**
1573 * Looks for pages sharing the monitor.
1574 *
1575 * @returns Pointer to the head page.
1576 * @returns NULL if not found.
1577 * @param pPool The Pool
1578 * @param pNewPage The page which is going to be monitored.
1579 */
1580static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1581{
1582#ifdef PGMPOOL_WITH_CACHE
1583 /*
1584 * Look up the GCPhys in the hash.
1585 */
1586 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1587 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1588 if (i == NIL_PGMPOOL_IDX)
1589 return NULL;
1590 do
1591 {
1592 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1593 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1594 && pPage != pNewPage)
1595 {
1596 switch (pPage->enmKind)
1597 {
1598 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1599 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1600 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1601 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1602 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1603 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1604 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1605 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1606 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1607 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1608 case PGMPOOLKIND_64BIT_PML4:
1609#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1610 case PGMPOOLKIND_32BIT_PD:
1611 case PGMPOOLKIND_PAE_PDPT:
1612#else
1613 case PGMPOOLKIND_ROOT_32BIT_PD:
1614 case PGMPOOLKIND_ROOT_PAE_PD:
1615 case PGMPOOLKIND_ROOT_PDPT:
1616#endif
1617 {
1618 /* find the head */
1619 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1620 {
1621 Assert(pPage->iMonitoredPrev != pPage->idx);
1622 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1623 }
1624 return pPage;
1625 }
1626
1627 /* ignore, no monitoring. */
1628 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1629 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1630 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1631 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1632 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1633 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1634 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1635 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1636 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1637 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1638 case PGMPOOLKIND_ROOT_NESTED:
1639 case PGMPOOLKIND_PAE_PD_PHYS:
1640 case PGMPOOLKIND_PAE_PDPT_PHYS:
1641 case PGMPOOLKIND_32BIT_PD_PHYS:
1642#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1643 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1644#endif
1645 break;
1646 default:
1647 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1648 }
1649 }
1650
1651 /* next */
1652 i = pPage->iNext;
1653 } while (i != NIL_PGMPOOL_IDX);
1654#endif
1655 return NULL;
1656}
1657
1658
1659/**
1660 * Enabled write monitoring of a guest page.
1661 *
1662 * @returns VBox status code.
1663 * @retval VINF_SUCCESS on success.
1664 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1665 * @param pPool The pool.
1666 * @param pPage The cached page.
1667 */
1668static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1669{
1670 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1671
1672 /*
1673 * Filter out the relevant kinds.
1674 */
1675 switch (pPage->enmKind)
1676 {
1677 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1678 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1679 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1680 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1681 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1682 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1683 case PGMPOOLKIND_64BIT_PML4:
1684#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1685 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1686 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1687 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1688 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1689 case PGMPOOLKIND_32BIT_PD:
1690 case PGMPOOLKIND_PAE_PDPT:
1691#else
1692 case PGMPOOLKIND_ROOT_PDPT:
1693#endif
1694 break;
1695
1696 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1697 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1698 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1699 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1700 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1701 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1702 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1703 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1704 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1705 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1706 case PGMPOOLKIND_ROOT_NESTED:
1707 /* Nothing to monitor here. */
1708 return VINF_SUCCESS;
1709
1710#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1711 case PGMPOOLKIND_32BIT_PD_PHYS:
1712 case PGMPOOLKIND_PAE_PDPT_PHYS:
1713 case PGMPOOLKIND_PAE_PD_PHYS:
1714 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1715 /* Nothing to monitor here. */
1716 return VINF_SUCCESS;
1717#else
1718 case PGMPOOLKIND_ROOT_32BIT_PD:
1719 case PGMPOOLKIND_ROOT_PAE_PD:
1720#endif
1721#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1722 break;
1723#else
1724 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1725#endif
1726 default:
1727 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1728 }
1729
1730 /*
1731 * Install handler.
1732 */
1733 int rc;
1734 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1735 if (pPageHead)
1736 {
1737 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1738 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1739 pPage->iMonitoredPrev = pPageHead->idx;
1740 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1741 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1742 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1743 pPageHead->iMonitoredNext = pPage->idx;
1744 rc = VINF_SUCCESS;
1745 }
1746 else
1747 {
1748 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1749 PVM pVM = pPool->CTX_SUFF(pVM);
1750 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1751 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1752 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1753 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1754 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1755 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1756 pPool->pszAccessHandler);
1757 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1758 * the heap size should suffice. */
1759 AssertFatalRC(rc);
1760 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1761 rc = VERR_PGM_POOL_CLEARED;
1762 }
1763 pPage->fMonitored = true;
1764 return rc;
1765}
1766
1767
1768/**
1769 * Disables write monitoring of a guest page.
1770 *
1771 * @returns VBox status code.
1772 * @retval VINF_SUCCESS on success.
1773 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1774 * @param pPool The pool.
1775 * @param pPage The cached page.
1776 */
1777static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1778{
1779 /*
1780 * Filter out the relevant kinds.
1781 */
1782 switch (pPage->enmKind)
1783 {
1784 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1785 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1786 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1787 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1788 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1789 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1790 case PGMPOOLKIND_64BIT_PML4:
1791#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1792 case PGMPOOLKIND_32BIT_PD:
1793 case PGMPOOLKIND_PAE_PDPT:
1794 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1795 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1796 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1797 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1798#else
1799 case PGMPOOLKIND_ROOT_PDPT:
1800#endif
1801 break;
1802
1803 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1804 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1805 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1806 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1807 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1808 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1809 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1810 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1811 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1812 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1813 case PGMPOOLKIND_ROOT_NESTED:
1814 case PGMPOOLKIND_PAE_PD_PHYS:
1815 case PGMPOOLKIND_PAE_PDPT_PHYS:
1816 case PGMPOOLKIND_32BIT_PD_PHYS:
1817 /* Nothing to monitor here. */
1818 return VINF_SUCCESS;
1819
1820#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1821 case PGMPOOLKIND_ROOT_32BIT_PD:
1822 case PGMPOOLKIND_ROOT_PAE_PD:
1823#endif
1824#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1825 break;
1826#endif
1827#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1828 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1829#endif
1830 default:
1831 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1832 }
1833
1834 /*
1835 * Remove the page from the monitored list or uninstall it if last.
1836 */
1837 const PVM pVM = pPool->CTX_SUFF(pVM);
1838 int rc;
1839 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1840 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1841 {
1842 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1843 {
1844 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1845 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1846 pNewHead->fCR3Mix = pPage->fCR3Mix;
1847 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1848 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1849 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1850 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
1851 pPool->pszAccessHandler);
1852 AssertFatalRCSuccess(rc);
1853 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1854 }
1855 else
1856 {
1857 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1858 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1859 {
1860 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1861 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1862 }
1863 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1864 rc = VINF_SUCCESS;
1865 }
1866 }
1867 else
1868 {
1869 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1870 AssertFatalRC(rc);
1871 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1872 rc = VERR_PGM_POOL_CLEARED;
1873 }
1874 pPage->fMonitored = false;
1875
1876 /*
1877 * Remove it from the list of modified pages (if in it).
1878 */
1879 pgmPoolMonitorModifiedRemove(pPool, pPage);
1880
1881 return rc;
1882}
1883
1884# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1885
1886/**
1887 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1888 *
1889 * @param pPool The Pool.
1890 * @param pPage A page in the chain.
1891 * @param fCR3Mix The new fCR3Mix value.
1892 */
1893static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1894{
1895 /* current */
1896 pPage->fCR3Mix = fCR3Mix;
1897
1898 /* before */
1899 int16_t idx = pPage->iMonitoredPrev;
1900 while (idx != NIL_PGMPOOL_IDX)
1901 {
1902 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1903 idx = pPool->aPages[idx].iMonitoredPrev;
1904 }
1905
1906 /* after */
1907 idx = pPage->iMonitoredNext;
1908 while (idx != NIL_PGMPOOL_IDX)
1909 {
1910 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1911 idx = pPool->aPages[idx].iMonitoredNext;
1912 }
1913}
1914
1915
1916/**
1917 * Installs or modifies monitoring of a CR3 page (special).
1918 *
1919 * We're pretending the CR3 page is shadowed by the pool so we can use the
1920 * generic mechanisms in detecting chained monitoring. (This also gives us a
1921 * tast of what code changes are required to really pool CR3 shadow pages.)
1922 *
1923 * @returns VBox status code.
1924 * @param pPool The pool.
1925 * @param idxRoot The CR3 (root) page index.
1926 * @param GCPhysCR3 The (new) CR3 value.
1927 */
1928int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1929{
1930 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1931 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1932 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d} GCPhysCR3=%RGp\n",
1933 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1934
1935 /*
1936 * The unlikely case where it already matches.
1937 */
1938 if (pPage->GCPhys == GCPhysCR3)
1939 {
1940 Assert(pPage->fMonitored);
1941 return VINF_SUCCESS;
1942 }
1943
1944 /*
1945 * Flush the current monitoring and remove it from the hash.
1946 */
1947 int rc = VINF_SUCCESS;
1948 if (pPage->fMonitored)
1949 {
1950 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1951 rc = pgmPoolMonitorFlush(pPool, pPage);
1952 if (rc == VERR_PGM_POOL_CLEARED)
1953 rc = VINF_SUCCESS;
1954 else
1955 AssertFatalRC(rc);
1956 pgmPoolHashRemove(pPool, pPage);
1957 }
1958
1959 /*
1960 * Monitor the page at the new location and insert it into the hash.
1961 */
1962 pPage->GCPhys = GCPhysCR3;
1963 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1964 if (rc2 != VERR_PGM_POOL_CLEARED)
1965 {
1966 AssertFatalRC(rc2);
1967 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1968 rc = rc2;
1969 }
1970 pgmPoolHashInsert(pPool, pPage);
1971 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1972 return rc;
1973}
1974
1975
1976/**
1977 * Removes the monitoring of a CR3 page (special).
1978 *
1979 * @returns VBox status code.
1980 * @param pPool The pool.
1981 * @param idxRoot The CR3 (root) page index.
1982 */
1983int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1984{
1985 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1986 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1987 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d}\n",
1988 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1989
1990 if (!pPage->fMonitored)
1991 return VINF_SUCCESS;
1992
1993 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1994 int rc = pgmPoolMonitorFlush(pPool, pPage);
1995 if (rc != VERR_PGM_POOL_CLEARED)
1996 AssertFatalRC(rc);
1997 else
1998 rc = VINF_SUCCESS;
1999 pgmPoolHashRemove(pPool, pPage);
2000 Assert(!pPage->fMonitored);
2001 pPage->GCPhys = NIL_RTGCPHYS;
2002 return rc;
2003}
2004
2005# endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
2006
2007/**
2008 * Inserts the page into the list of modified pages.
2009 *
2010 * @param pPool The pool.
2011 * @param pPage The page.
2012 */
2013void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2014{
2015 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2016 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2017 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2018 && pPool->iModifiedHead != pPage->idx,
2019 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2020 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2021 pPool->iModifiedHead, pPool->cModifiedPages));
2022
2023 pPage->iModifiedNext = pPool->iModifiedHead;
2024 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2025 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2026 pPool->iModifiedHead = pPage->idx;
2027 pPool->cModifiedPages++;
2028#ifdef VBOX_WITH_STATISTICS
2029 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2030 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2031#endif
2032}
2033
2034
2035/**
2036 * Removes the page from the list of modified pages and resets the
2037 * moficiation counter.
2038 *
2039 * @param pPool The pool.
2040 * @param pPage The page which is believed to be in the list of modified pages.
2041 */
2042static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2043{
2044 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2045 if (pPool->iModifiedHead == pPage->idx)
2046 {
2047 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2048 pPool->iModifiedHead = pPage->iModifiedNext;
2049 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2050 {
2051 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2052 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2053 }
2054 pPool->cModifiedPages--;
2055 }
2056 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2057 {
2058 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2059 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2060 {
2061 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2062 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2063 }
2064 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2065 pPool->cModifiedPages--;
2066 }
2067 else
2068 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2069 pPage->cModifications = 0;
2070}
2071
2072
2073/**
2074 * Zaps the list of modified pages, resetting their modification counters in the process.
2075 *
2076 * @param pVM The VM handle.
2077 */
2078void pgmPoolMonitorModifiedClearAll(PVM pVM)
2079{
2080 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2081 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2082
2083 unsigned cPages = 0; NOREF(cPages);
2084 uint16_t idx = pPool->iModifiedHead;
2085 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2086 while (idx != NIL_PGMPOOL_IDX)
2087 {
2088 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2089 idx = pPage->iModifiedNext;
2090 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2091 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2092 pPage->cModifications = 0;
2093 Assert(++cPages);
2094 }
2095 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2096 pPool->cModifiedPages = 0;
2097}
2098
2099
2100#ifdef IN_RING3
2101/**
2102 * Clear all shadow pages and clear all modification counters.
2103 *
2104 * @param pVM The VM handle.
2105 * @remark Should only be used when monitoring is available, thus placed in
2106 * the PGMPOOL_WITH_MONITORING #ifdef.
2107 */
2108void pgmPoolClearAll(PVM pVM)
2109{
2110 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2111 STAM_PROFILE_START(&pPool->StatClearAll, c);
2112 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2113
2114 /*
2115 * Iterate all the pages until we've encountered all that in use.
2116 * This is simple but not quite optimal solution.
2117 */
2118 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2119 unsigned cLeft = pPool->cUsedPages;
2120 unsigned iPage = pPool->cCurPages;
2121 while (--iPage >= PGMPOOL_IDX_FIRST)
2122 {
2123 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2124 if (pPage->GCPhys != NIL_RTGCPHYS)
2125 {
2126 switch (pPage->enmKind)
2127 {
2128 /*
2129 * We only care about shadow page tables.
2130 */
2131 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2132 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2133 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2134 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2135 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2136 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2137 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2138 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2139 {
2140#ifdef PGMPOOL_WITH_USER_TRACKING
2141 if (pPage->cPresent)
2142#endif
2143 {
2144 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2145 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2146 ASMMemZeroPage(pvShw);
2147 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2148#ifdef PGMPOOL_WITH_USER_TRACKING
2149 pPage->cPresent = 0;
2150 pPage->iFirstPresent = ~0;
2151#endif
2152 }
2153 }
2154 /* fall thru */
2155
2156 default:
2157 Assert(!pPage->cModifications || ++cModifiedPages);
2158 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2159 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2160 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2161 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2162 pPage->cModifications = 0;
2163 break;
2164
2165 }
2166 if (!--cLeft)
2167 break;
2168 }
2169 }
2170
2171 /* swipe the special pages too. */
2172 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2173 {
2174 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2175 if (pPage->GCPhys != NIL_RTGCPHYS)
2176 {
2177 Assert(!pPage->cModifications || ++cModifiedPages);
2178 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2179 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2180 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2181 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2182 pPage->cModifications = 0;
2183 }
2184 }
2185
2186#ifndef DEBUG_michael
2187 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2188#endif
2189 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2190 pPool->cModifiedPages = 0;
2191
2192#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2193 /*
2194 * Clear all the GCPhys links and rebuild the phys ext free list.
2195 */
2196 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2197 pRam;
2198 pRam = pRam->CTX_SUFF(pNext))
2199 {
2200 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2201 while (iPage-- > 0)
2202 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2203 }
2204
2205 pPool->iPhysExtFreeHead = 0;
2206 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2207 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2208 for (unsigned i = 0; i < cMaxPhysExts; i++)
2209 {
2210 paPhysExts[i].iNext = i + 1;
2211 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2212 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2213 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2214 }
2215 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2216#endif
2217
2218
2219 pPool->cPresent = 0;
2220 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2221}
2222#endif /* IN_RING3 */
2223
2224
2225/**
2226 * Handle SyncCR3 pool tasks
2227 *
2228 * @returns VBox status code.
2229 * @retval VINF_SUCCESS if successfully added.
2230 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2231 * @param pVM The VM handle.
2232 * @remark Should only be used when monitoring is available, thus placed in
2233 * the PGMPOOL_WITH_MONITORING #ifdef.
2234 */
2235int pgmPoolSyncCR3(PVM pVM)
2236{
2237 /*
2238 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2239 * Occasionally we will have to clear all the shadow page tables because we wanted
2240 * to monitor a page which was mapped by too many shadowed page tables. This operation
2241 * sometimes refered to as a 'lightweight flush'.
2242 */
2243 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2244 pgmPoolMonitorModifiedClearAll(pVM);
2245 else
2246 {
2247# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2248 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2249 pgmPoolClearAll(pVM);
2250# else /* !IN_RING3 */
2251 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2252 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2253 return VINF_PGM_SYNC_CR3;
2254# endif /* !IN_RING3 */
2255 }
2256 return VINF_SUCCESS;
2257}
2258
2259#endif /* PGMPOOL_WITH_MONITORING */
2260#ifdef PGMPOOL_WITH_USER_TRACKING
2261
2262/**
2263 * Frees up at least one user entry.
2264 *
2265 * @returns VBox status code.
2266 * @retval VINF_SUCCESS if successfully added.
2267 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2268 * @param pPool The pool.
2269 * @param iUser The user index.
2270 */
2271static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2272{
2273 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2274#ifdef PGMPOOL_WITH_CACHE
2275 /*
2276 * Just free cached pages in a braindead fashion.
2277 */
2278 /** @todo walk the age list backwards and free the first with usage. */
2279 int rc = VINF_SUCCESS;
2280 do
2281 {
2282 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2283 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2284 rc = rc2;
2285 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2286 return rc;
2287#else
2288 /*
2289 * Lazy approach.
2290 */
2291 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2292 Assert(!CPUMIsGuestInLongMode(pVM));
2293 pgmPoolFlushAllInt(pPool);
2294 return VERR_PGM_POOL_FLUSHED;
2295#endif
2296}
2297
2298
2299/**
2300 * Inserts a page into the cache.
2301 *
2302 * This will create user node for the page, insert it into the GCPhys
2303 * hash, and insert it into the age list.
2304 *
2305 * @returns VBox status code.
2306 * @retval VINF_SUCCESS if successfully added.
2307 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2308 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2309 * @param pPool The pool.
2310 * @param pPage The cached page.
2311 * @param GCPhys The GC physical address of the page we're gonna shadow.
2312 * @param iUser The user index.
2313 * @param iUserTable The user table index.
2314 */
2315DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2316{
2317 int rc = VINF_SUCCESS;
2318 PPGMPOOLUSER pUser = pPool->CTX_SUFF(paUsers);
2319
2320 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
2321
2322 /*
2323 * Find free a user node.
2324 */
2325 uint16_t i = pPool->iUserFreeHead;
2326 if (i == NIL_PGMPOOL_USER_INDEX)
2327 {
2328 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2329 if (RT_FAILURE(rc))
2330 return rc;
2331 i = pPool->iUserFreeHead;
2332 }
2333
2334 /*
2335 * Unlink the user node from the free list,
2336 * initialize and insert it into the user list.
2337 */
2338 pPool->iUserFreeHead = pUser[i].iNext;
2339 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
2340 pUser[i].iUser = iUser;
2341 pUser[i].iUserTable = iUserTable;
2342 pPage->iUserHead = i;
2343
2344 /*
2345 * Insert into cache and enable monitoring of the guest page if enabled.
2346 *
2347 * Until we implement caching of all levels, including the CR3 one, we'll
2348 * have to make sure we don't try monitor & cache any recursive reuse of
2349 * a monitored CR3 page. Because all windows versions are doing this we'll
2350 * have to be able to do combined access monitoring, CR3 + PT and
2351 * PD + PT (guest PAE).
2352 *
2353 * Update:
2354 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2355 */
2356#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2357# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2358 const bool fCanBeMonitored = true;
2359# else
2360 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2361 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2362 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2363# endif
2364# ifdef PGMPOOL_WITH_CACHE
2365 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2366# endif
2367 if (fCanBeMonitored)
2368 {
2369# ifdef PGMPOOL_WITH_MONITORING
2370 rc = pgmPoolMonitorInsert(pPool, pPage);
2371 if (rc == VERR_PGM_POOL_CLEARED)
2372 {
2373 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2374# ifndef PGMPOOL_WITH_CACHE
2375 pgmPoolMonitorFlush(pPool, pPage);
2376 rc = VERR_PGM_POOL_FLUSHED;
2377# endif
2378 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2379 pUser[i].iNext = pPool->iUserFreeHead;
2380 pUser[i].iUser = NIL_PGMPOOL_IDX;
2381 pPool->iUserFreeHead = i;
2382 }
2383 }
2384# endif
2385#endif /* PGMPOOL_WITH_MONITORING */
2386 return rc;
2387}
2388
2389
2390# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2391/**
2392 * Adds a user reference to a page.
2393 *
2394 * This will
2395 * This will move the page to the head of the
2396 *
2397 * @returns VBox status code.
2398 * @retval VINF_SUCCESS if successfully added.
2399 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2400 * @param pPool The pool.
2401 * @param pPage The cached page.
2402 * @param iUser The user index.
2403 * @param iUserTable The user table.
2404 */
2405static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2406{
2407 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2408
2409 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
2410# ifdef VBOX_STRICT
2411 /*
2412 * Check that the entry doesn't already exists.
2413 */
2414 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2415 {
2416 uint16_t i = pPage->iUserHead;
2417 do
2418 {
2419 Assert(i < pPool->cMaxUsers);
2420 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2421 i = paUsers[i].iNext;
2422 } while (i != NIL_PGMPOOL_USER_INDEX);
2423 }
2424# endif
2425
2426 /*
2427 * Allocate a user node.
2428 */
2429 uint16_t i = pPool->iUserFreeHead;
2430 if (i == NIL_PGMPOOL_USER_INDEX)
2431 {
2432 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2433 if (RT_FAILURE(rc))
2434 return rc;
2435 i = pPool->iUserFreeHead;
2436 }
2437 pPool->iUserFreeHead = paUsers[i].iNext;
2438
2439 /*
2440 * Initialize the user node and insert it.
2441 */
2442 paUsers[i].iNext = pPage->iUserHead;
2443 paUsers[i].iUser = iUser;
2444 paUsers[i].iUserTable = iUserTable;
2445 pPage->iUserHead = i;
2446
2447# ifdef PGMPOOL_WITH_CACHE
2448 /*
2449 * Tell the cache to update its replacement stats for this page.
2450 */
2451 pgmPoolCacheUsed(pPool, pPage);
2452# endif
2453 return VINF_SUCCESS;
2454}
2455# endif /* PGMPOOL_WITH_CACHE */
2456
2457
2458/**
2459 * Frees a user record associated with a page.
2460 *
2461 * This does not clear the entry in the user table, it simply replaces the
2462 * user record to the chain of free records.
2463 *
2464 * @param pPool The pool.
2465 * @param HCPhys The HC physical address of the shadow page.
2466 * @param iUser The shadow page pool index of the user table.
2467 * @param iUserTable The index into the user table (shadowed).
2468 */
2469static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2470{
2471 /*
2472 * Unlink and free the specified user entry.
2473 */
2474 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2475
2476 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2477 uint16_t i = pPage->iUserHead;
2478 if ( i != NIL_PGMPOOL_USER_INDEX
2479 && paUsers[i].iUser == iUser
2480 && paUsers[i].iUserTable == iUserTable)
2481 {
2482 pPage->iUserHead = paUsers[i].iNext;
2483
2484 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2485 paUsers[i].iNext = pPool->iUserFreeHead;
2486 pPool->iUserFreeHead = i;
2487 return;
2488 }
2489
2490 /* General: Linear search. */
2491 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2492 while (i != NIL_PGMPOOL_USER_INDEX)
2493 {
2494 if ( paUsers[i].iUser == iUser
2495 && paUsers[i].iUserTable == iUserTable)
2496 {
2497 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2498 paUsers[iPrev].iNext = paUsers[i].iNext;
2499 else
2500 pPage->iUserHead = paUsers[i].iNext;
2501
2502 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2503 paUsers[i].iNext = pPool->iUserFreeHead;
2504 pPool->iUserFreeHead = i;
2505 return;
2506 }
2507 iPrev = i;
2508 i = paUsers[i].iNext;
2509 }
2510
2511 /* Fatal: didn't find it */
2512 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2513 iUser, iUserTable, pPage->GCPhys));
2514}
2515
2516
2517/**
2518 * Gets the entry size of a shadow table.
2519 *
2520 * @param enmKind The kind of page.
2521 *
2522 * @returns The size of the entry in bytes. That is, 4 or 8.
2523 * @returns If the kind is not for a table, an assertion is raised and 0 is
2524 * returned.
2525 */
2526DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2527{
2528 switch (enmKind)
2529 {
2530 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2531 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2532 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2533#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2534 case PGMPOOLKIND_32BIT_PD:
2535 case PGMPOOLKIND_32BIT_PD_PHYS:
2536#else
2537 case PGMPOOLKIND_ROOT_32BIT_PD:
2538#endif
2539 return 4;
2540
2541 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2542 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2543 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2544 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2545 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2546 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2547 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2548 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2549 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2550 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2551 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2552 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2553 case PGMPOOLKIND_64BIT_PML4:
2554#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2555 case PGMPOOLKIND_ROOT_PAE_PD:
2556 case PGMPOOLKIND_ROOT_PDPT:
2557#endif
2558 case PGMPOOLKIND_PAE_PDPT:
2559 case PGMPOOLKIND_ROOT_NESTED:
2560 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2561 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2562 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2563 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2564 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2565 case PGMPOOLKIND_PAE_PD_PHYS:
2566 case PGMPOOLKIND_PAE_PDPT_PHYS:
2567 return 8;
2568
2569 default:
2570 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2571 }
2572}
2573
2574
2575/**
2576 * Gets the entry size of a guest table.
2577 *
2578 * @param enmKind The kind of page.
2579 *
2580 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2581 * @returns If the kind is not for a table, an assertion is raised and 0 is
2582 * returned.
2583 */
2584DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2585{
2586 switch (enmKind)
2587 {
2588 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2589 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2590#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2591 case PGMPOOLKIND_32BIT_PD:
2592#else
2593 case PGMPOOLKIND_ROOT_32BIT_PD:
2594#endif
2595 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2596 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2597 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2598 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2599 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2600 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2601 return 4;
2602
2603 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2604 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2605 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2606 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2607 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2608 case PGMPOOLKIND_64BIT_PML4:
2609#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2610 case PGMPOOLKIND_PAE_PDPT:
2611#else
2612 case PGMPOOLKIND_ROOT_PAE_PD:
2613 case PGMPOOLKIND_ROOT_PDPT:
2614#endif
2615 return 8;
2616
2617 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2618 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2619 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2620 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2621 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2622 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2623 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2624 case PGMPOOLKIND_ROOT_NESTED:
2625 case PGMPOOLKIND_PAE_PD_PHYS:
2626 case PGMPOOLKIND_PAE_PDPT_PHYS:
2627 case PGMPOOLKIND_32BIT_PD_PHYS:
2628 /** @todo can we return 0? (nobody is calling this...) */
2629 AssertFailed();
2630 return 0;
2631
2632 default:
2633 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2634 }
2635}
2636
2637#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2638
2639/**
2640 * Scans one shadow page table for mappings of a physical page.
2641 *
2642 * @param pVM The VM handle.
2643 * @param pPhysPage The guest page in question.
2644 * @param iShw The shadow page table.
2645 * @param cRefs The number of references made in that PT.
2646 */
2647static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2648{
2649 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2650 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2651
2652 /*
2653 * Assert sanity.
2654 */
2655 Assert(cRefs == 1);
2656 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2657 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2658
2659 /*
2660 * Then, clear the actual mappings to the page in the shadow PT.
2661 */
2662 switch (pPage->enmKind)
2663 {
2664 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2665 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2666 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2667 {
2668 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2669 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2670 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2671 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2672 {
2673 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2674 pPT->a[i].u = 0;
2675 cRefs--;
2676 if (!cRefs)
2677 return;
2678 }
2679#ifdef LOG_ENABLED
2680 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2681 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2682 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2683 {
2684 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2685 pPT->a[i].u = 0;
2686 }
2687#endif
2688 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2689 break;
2690 }
2691
2692 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2693 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2694 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2695 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2696 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2697 {
2698 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2699 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2700 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2701 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2702 {
2703 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2704 pPT->a[i].u = 0;
2705 cRefs--;
2706 if (!cRefs)
2707 return;
2708 }
2709#ifdef LOG_ENABLED
2710 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2711 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2712 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2713 {
2714 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2715 pPT->a[i].u = 0;
2716 }
2717#endif
2718 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
2719 break;
2720 }
2721
2722 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2723 {
2724 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2725 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2726 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2727 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2728 {
2729 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2730 pPT->a[i].u = 0;
2731 cRefs--;
2732 if (!cRefs)
2733 return;
2734 }
2735#ifdef LOG_ENABLED
2736 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2737 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2738 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2739 {
2740 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2741 pPT->a[i].u = 0;
2742 }
2743#endif
2744 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2745 break;
2746 }
2747
2748 default:
2749 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2750 }
2751}
2752
2753
2754/**
2755 * Scans one shadow page table for mappings of a physical page.
2756 *
2757 * @param pVM The VM handle.
2758 * @param pPhysPage The guest page in question.
2759 * @param iShw The shadow page table.
2760 * @param cRefs The number of references made in that PT.
2761 */
2762void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2763{
2764 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2765 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2766 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2767 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2768 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2769 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2770}
2771
2772
2773/**
2774 * Flushes a list of shadow page tables mapping the same physical page.
2775 *
2776 * @param pVM The VM handle.
2777 * @param pPhysPage The guest page in question.
2778 * @param iPhysExt The physical cross reference extent list to flush.
2779 */
2780void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2781{
2782 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2783 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2784 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2785
2786 const uint16_t iPhysExtStart = iPhysExt;
2787 PPGMPOOLPHYSEXT pPhysExt;
2788 do
2789 {
2790 Assert(iPhysExt < pPool->cMaxPhysExts);
2791 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2792 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2793 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2794 {
2795 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2796 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2797 }
2798
2799 /* next */
2800 iPhysExt = pPhysExt->iNext;
2801 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2802
2803 /* insert the list into the free list and clear the ram range entry. */
2804 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2805 pPool->iPhysExtFreeHead = iPhysExtStart;
2806 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2807
2808 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2809}
2810
2811#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2812
2813/**
2814 * Scans all shadow page tables for mappings of a physical page.
2815 *
2816 * This may be slow, but it's most likely more efficient than cleaning
2817 * out the entire page pool / cache.
2818 *
2819 * @returns VBox status code.
2820 * @retval VINF_SUCCESS if all references has been successfully cleared.
2821 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2822 * a page pool cleaning.
2823 *
2824 * @param pVM The VM handle.
2825 * @param pPhysPage The guest page in question.
2826 */
2827int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2828{
2829 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2830 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2831 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2832 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2833
2834#if 1
2835 /*
2836 * There is a limit to what makes sense.
2837 */
2838 if (pPool->cPresent > 1024)
2839 {
2840 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2841 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2842 return VINF_PGM_GCPHYS_ALIASED;
2843 }
2844#endif
2845
2846 /*
2847 * Iterate all the pages until we've encountered all that in use.
2848 * This is simple but not quite optimal solution.
2849 */
2850 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2851 const uint32_t u32 = u64;
2852 unsigned cLeft = pPool->cUsedPages;
2853 unsigned iPage = pPool->cCurPages;
2854 while (--iPage >= PGMPOOL_IDX_FIRST)
2855 {
2856 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2857 if (pPage->GCPhys != NIL_RTGCPHYS)
2858 {
2859 switch (pPage->enmKind)
2860 {
2861 /*
2862 * We only care about shadow page tables.
2863 */
2864 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2865 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2866 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2867 {
2868 unsigned cPresent = pPage->cPresent;
2869 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2870 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2871 if (pPT->a[i].n.u1Present)
2872 {
2873 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2874 {
2875 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2876 pPT->a[i].u = 0;
2877 }
2878 if (!--cPresent)
2879 break;
2880 }
2881 break;
2882 }
2883
2884 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2885 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2886 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2887 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2888 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2889 {
2890 unsigned cPresent = pPage->cPresent;
2891 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2892 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2893 if (pPT->a[i].n.u1Present)
2894 {
2895 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2896 {
2897 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2898 pPT->a[i].u = 0;
2899 }
2900 if (!--cPresent)
2901 break;
2902 }
2903 break;
2904 }
2905 }
2906 if (!--cLeft)
2907 break;
2908 }
2909 }
2910
2911 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2912 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2913 return VINF_SUCCESS;
2914}
2915
2916
2917/**
2918 * Clears the user entry in a user table.
2919 *
2920 * This is used to remove all references to a page when flushing it.
2921 */
2922static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2923{
2924 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2925 Assert(pUser->iUser < pPool->cCurPages);
2926 uint32_t iUserTable = pUser->iUserTable;
2927
2928 /*
2929 * Map the user page.
2930 */
2931 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2932#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2933 if (pUserPage->enmKind == PGMPOOLKIND_ROOT_PAE_PD)
2934 {
2935 /* Must translate the fake 2048 entry PD to a 512 PD one since the R0 mapping is not linear. */
2936 Assert(pUser->iUser == PGMPOOL_IDX_PAE_PD);
2937 uint32_t iPdpt = iUserTable / X86_PG_PAE_ENTRIES;
2938 iUserTable %= X86_PG_PAE_ENTRIES;
2939 pUserPage = &pPool->aPages[PGMPOOL_IDX_PAE_PD_0 + iPdpt];
2940 Assert(pUserPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD);
2941 }
2942#endif
2943 union
2944 {
2945 uint64_t *pau64;
2946 uint32_t *pau32;
2947 } u;
2948 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
2949
2950 /* Safety precaution in case we change the paging for other modes too in the future. */
2951 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
2952
2953#ifdef VBOX_STRICT
2954 /*
2955 * Some sanity checks.
2956 */
2957 switch (pUserPage->enmKind)
2958 {
2959# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2960 case PGMPOOLKIND_32BIT_PD:
2961 case PGMPOOLKIND_32BIT_PD_PHYS:
2962 Assert(iUserTable < X86_PG_ENTRIES);
2963 break;
2964# else
2965 case PGMPOOLKIND_ROOT_32BIT_PD:
2966 Assert(iUserTable < X86_PG_ENTRIES);
2967 Assert(!(u.pau32[iUserTable] & PGM_PDFLAGS_MAPPING));
2968 break;
2969# endif
2970# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2971 case PGMPOOLKIND_ROOT_PAE_PD:
2972 Assert(iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2973 AssertMsg(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING), ("%llx %d\n", u.pau64[iUserTable], iUserTable));
2974 break;
2975# endif
2976# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2977 case PGMPOOLKIND_PAE_PDPT:
2978 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2979 case PGMPOOLKIND_PAE_PDPT_PHYS:
2980# else
2981 case PGMPOOLKIND_ROOT_PDPT:
2982# endif
2983 Assert(iUserTable < 4);
2984 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2985 break;
2986 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2987 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2988 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2989 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2990 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2991 case PGMPOOLKIND_PAE_PD_PHYS:
2992 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2993 break;
2994 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2995 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2996 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
2997 break;
2998 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2999 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3000 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3001 break;
3002 case PGMPOOLKIND_64BIT_PML4:
3003 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3004 /* GCPhys >> PAGE_SHIFT is the index here */
3005 break;
3006 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3007 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3008 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3009 break;
3010
3011 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3012 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3013 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3014 break;
3015
3016 case PGMPOOLKIND_ROOT_NESTED:
3017 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3018 break;
3019
3020 default:
3021 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3022 break;
3023 }
3024#endif /* VBOX_STRICT */
3025
3026 /*
3027 * Clear the entry in the user page.
3028 */
3029 switch (pUserPage->enmKind)
3030 {
3031 /* 32-bit entries */
3032#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3033 case PGMPOOLKIND_32BIT_PD:
3034 case PGMPOOLKIND_32BIT_PD_PHYS:
3035#else
3036 case PGMPOOLKIND_ROOT_32BIT_PD:
3037#endif
3038 u.pau32[iUserTable] = 0;
3039 break;
3040
3041 /* 64-bit entries */
3042 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3043 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3044 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3045 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3046 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3047 case PGMPOOLKIND_PAE_PD_PHYS:
3048 case PGMPOOLKIND_PAE_PDPT_PHYS:
3049 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3050 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3051 case PGMPOOLKIND_64BIT_PML4:
3052 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3053 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3054# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3055 case PGMPOOLKIND_ROOT_PAE_PD:
3056#endif
3057#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3058 case PGMPOOLKIND_PAE_PDPT:
3059 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3060#else
3061 case PGMPOOLKIND_ROOT_PDPT:
3062#endif
3063 case PGMPOOLKIND_ROOT_NESTED:
3064 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3065 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3066 u.pau64[iUserTable] = 0;
3067 break;
3068
3069 default:
3070 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3071 }
3072}
3073
3074
3075/**
3076 * Clears all users of a page.
3077 */
3078static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3079{
3080 /*
3081 * Free all the user records.
3082 */
3083 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3084 uint16_t i = pPage->iUserHead;
3085 while (i != NIL_PGMPOOL_USER_INDEX)
3086 {
3087 /* Clear enter in user table. */
3088 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3089
3090 /* Free it. */
3091 const uint16_t iNext = paUsers[i].iNext;
3092 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3093 paUsers[i].iNext = pPool->iUserFreeHead;
3094 pPool->iUserFreeHead = i;
3095
3096 /* Next. */
3097 i = iNext;
3098 }
3099 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3100}
3101
3102#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3103
3104/**
3105 * Allocates a new physical cross reference extent.
3106 *
3107 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3108 * @param pVM The VM handle.
3109 * @param piPhysExt Where to store the phys ext index.
3110 */
3111PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3112{
3113 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3114 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3115 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3116 {
3117 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3118 return NULL;
3119 }
3120 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3121 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3122 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3123 *piPhysExt = iPhysExt;
3124 return pPhysExt;
3125}
3126
3127
3128/**
3129 * Frees a physical cross reference extent.
3130 *
3131 * @param pVM The VM handle.
3132 * @param iPhysExt The extent to free.
3133 */
3134void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3135{
3136 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3137 Assert(iPhysExt < pPool->cMaxPhysExts);
3138 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3139 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3140 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3141 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3142 pPool->iPhysExtFreeHead = iPhysExt;
3143}
3144
3145
3146/**
3147 * Frees a physical cross reference extent.
3148 *
3149 * @param pVM The VM handle.
3150 * @param iPhysExt The extent to free.
3151 */
3152void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3153{
3154 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3155
3156 const uint16_t iPhysExtStart = iPhysExt;
3157 PPGMPOOLPHYSEXT pPhysExt;
3158 do
3159 {
3160 Assert(iPhysExt < pPool->cMaxPhysExts);
3161 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3162 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3163 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3164
3165 /* next */
3166 iPhysExt = pPhysExt->iNext;
3167 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3168
3169 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3170 pPool->iPhysExtFreeHead = iPhysExtStart;
3171}
3172
3173
3174/**
3175 * Insert a reference into a list of physical cross reference extents.
3176 *
3177 * @returns The new ram range flags (top 16-bits).
3178 *
3179 * @param pVM The VM handle.
3180 * @param iPhysExt The physical extent index of the list head.
3181 * @param iShwPT The shadow page table index.
3182 *
3183 */
3184static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3185{
3186 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3187 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3188
3189 /* special common case. */
3190 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3191 {
3192 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3193 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3194 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
3195 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3196 }
3197
3198 /* general treatment. */
3199 const uint16_t iPhysExtStart = iPhysExt;
3200 unsigned cMax = 15;
3201 for (;;)
3202 {
3203 Assert(iPhysExt < pPool->cMaxPhysExts);
3204 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3205 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3206 {
3207 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3208 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3209 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3210 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3211 }
3212 if (!--cMax)
3213 {
3214 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3215 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3216 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
3217 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3218 }
3219 }
3220
3221 /* add another extent to the list. */
3222 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3223 if (!pNew)
3224 {
3225 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3226 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3227 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3228 }
3229 pNew->iNext = iPhysExtStart;
3230 pNew->aidx[0] = iShwPT;
3231 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3232 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3233}
3234
3235
3236/**
3237 * Add a reference to guest physical page where extents are in use.
3238 *
3239 * @returns The new ram range flags (top 16-bits).
3240 *
3241 * @param pVM The VM handle.
3242 * @param u16 The ram range flags (top 16-bits).
3243 * @param iShwPT The shadow page table index.
3244 */
3245uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3246{
3247 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
3248 {
3249 /*
3250 * Convert to extent list.
3251 */
3252 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
3253 uint16_t iPhysExt;
3254 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3255 if (pPhysExt)
3256 {
3257 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
3258 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3259 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
3260 pPhysExt->aidx[1] = iShwPT;
3261 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3262 }
3263 else
3264 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3265 }
3266 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
3267 {
3268 /*
3269 * Insert into the extent list.
3270 */
3271 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
3272 }
3273 else
3274 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3275 return u16;
3276}
3277
3278
3279/**
3280 * Clear references to guest physical memory.
3281 *
3282 * @param pPool The pool.
3283 * @param pPage The page.
3284 * @param pPhysPage Pointer to the aPages entry in the ram range.
3285 */
3286void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3287{
3288 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
3289 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3290
3291 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
3292 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
3293 {
3294 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3295 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3296 do
3297 {
3298 Assert(iPhysExt < pPool->cMaxPhysExts);
3299
3300 /*
3301 * Look for the shadow page and check if it's all freed.
3302 */
3303 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3304 {
3305 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3306 {
3307 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3308
3309 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3310 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3311 {
3312 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3313 return;
3314 }
3315
3316 /* we can free the node. */
3317 PVM pVM = pPool->CTX_SUFF(pVM);
3318 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3319 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3320 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3321 {
3322 /* lonely node */
3323 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3324 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
3325 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3326 }
3327 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3328 {
3329 /* head */
3330 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
3331 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
3332 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
3333 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
3334 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3335 }
3336 else
3337 {
3338 /* in list */
3339 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3340 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3341 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3342 }
3343 iPhysExt = iPhysExtNext;
3344 return;
3345 }
3346 }
3347
3348 /* next */
3349 iPhysExtPrev = iPhysExt;
3350 iPhysExt = paPhysExts[iPhysExt].iNext;
3351 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3352
3353 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3354 }
3355 else /* nothing to do */
3356 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
3357}
3358
3359
3360/**
3361 * Clear references to guest physical memory.
3362 *
3363 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3364 * is assumed to be correct, so the linear search can be skipped and we can assert
3365 * at an earlier point.
3366 *
3367 * @param pPool The pool.
3368 * @param pPage The page.
3369 * @param HCPhys The host physical address corresponding to the guest page.
3370 * @param GCPhys The guest physical address corresponding to HCPhys.
3371 */
3372static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3373{
3374 /*
3375 * Walk range list.
3376 */
3377 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3378 while (pRam)
3379 {
3380 RTGCPHYS off = GCPhys - pRam->GCPhys;
3381 if (off < pRam->cb)
3382 {
3383 /* does it match? */
3384 const unsigned iPage = off >> PAGE_SHIFT;
3385 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3386#ifdef LOG_ENABLED
3387RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3388Log(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3389#endif
3390 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3391 {
3392 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3393 return;
3394 }
3395 break;
3396 }
3397 pRam = pRam->CTX_SUFF(pNext);
3398 }
3399 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3400}
3401
3402
3403/**
3404 * Clear references to guest physical memory.
3405 *
3406 * @param pPool The pool.
3407 * @param pPage The page.
3408 * @param HCPhys The host physical address corresponding to the guest page.
3409 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3410 */
3411static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3412{
3413 /*
3414 * Walk range list.
3415 */
3416 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3417 while (pRam)
3418 {
3419 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3420 if (off < pRam->cb)
3421 {
3422 /* does it match? */
3423 const unsigned iPage = off >> PAGE_SHIFT;
3424 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3425 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3426 {
3427 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3428 return;
3429 }
3430 break;
3431 }
3432 pRam = pRam->CTX_SUFF(pNext);
3433 }
3434
3435 /*
3436 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3437 */
3438 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3439 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3440 while (pRam)
3441 {
3442 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3443 while (iPage-- > 0)
3444 {
3445 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3446 {
3447 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3448 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3449 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3450 return;
3451 }
3452 }
3453 pRam = pRam->CTX_SUFF(pNext);
3454 }
3455
3456 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3457}
3458
3459
3460/**
3461 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3462 *
3463 * @param pPool The pool.
3464 * @param pPage The page.
3465 * @param pShwPT The shadow page table (mapping of the page).
3466 * @param pGstPT The guest page table.
3467 */
3468DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3469{
3470 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3471 if (pShwPT->a[i].n.u1Present)
3472 {
3473 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3474 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3475 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3476 if (!--pPage->cPresent)
3477 break;
3478 }
3479}
3480
3481
3482/**
3483 * Clear references to guest physical memory in a PAE / 32-bit page table.
3484 *
3485 * @param pPool The pool.
3486 * @param pPage The page.
3487 * @param pShwPT The shadow page table (mapping of the page).
3488 * @param pGstPT The guest page table (just a half one).
3489 */
3490DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3491{
3492 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3493 if (pShwPT->a[i].n.u1Present)
3494 {
3495 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3496 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3497 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3498 }
3499}
3500
3501
3502/**
3503 * Clear references to guest physical memory in a PAE / PAE page table.
3504 *
3505 * @param pPool The pool.
3506 * @param pPage The page.
3507 * @param pShwPT The shadow page table (mapping of the page).
3508 * @param pGstPT The guest page table.
3509 */
3510DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3511{
3512 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3513 if (pShwPT->a[i].n.u1Present)
3514 {
3515 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3516 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3517 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3518 }
3519}
3520
3521
3522/**
3523 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3524 *
3525 * @param pPool The pool.
3526 * @param pPage The page.
3527 * @param pShwPT The shadow page table (mapping of the page).
3528 */
3529DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3530{
3531 RTGCPHYS GCPhys = pPage->GCPhys;
3532 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3533 if (pShwPT->a[i].n.u1Present)
3534 {
3535 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3536 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3537 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3538 }
3539}
3540
3541
3542/**
3543 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3544 *
3545 * @param pPool The pool.
3546 * @param pPage The page.
3547 * @param pShwPT The shadow page table (mapping of the page).
3548 */
3549DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3550{
3551 RTGCPHYS GCPhys = pPage->GCPhys;
3552 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3553 if (pShwPT->a[i].n.u1Present)
3554 {
3555 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3556 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3557 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3558 }
3559}
3560
3561#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3562
3563
3564#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3565/**
3566 * Clear references to shadowed pages in a 32 bits page directory.
3567 *
3568 * @param pPool The pool.
3569 * @param pPage The page.
3570 * @param pShwPD The shadow page directory (mapping of the page).
3571 */
3572DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
3573{
3574 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3575 {
3576 if ( pShwPD->a[i].n.u1Present
3577 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3578 )
3579 {
3580 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
3581 if (pSubPage)
3582 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3583 else
3584 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
3585 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3586 }
3587 }
3588}
3589#endif
3590
3591/**
3592 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3593 *
3594 * @param pPool The pool.
3595 * @param pPage The page.
3596 * @param pShwPD The shadow page directory (mapping of the page).
3597 */
3598DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3599{
3600 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3601 {
3602 if ( pShwPD->a[i].n.u1Present
3603#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3604 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3605#endif
3606 )
3607 {
3608 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3609 if (pSubPage)
3610 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3611 else
3612 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3613 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3614 }
3615 }
3616}
3617
3618
3619/**
3620 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3621 *
3622 * @param pPool The pool.
3623 * @param pPage The page.
3624 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3625 */
3626DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3627{
3628 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3629 {
3630 if ( pShwPDPT->a[i].n.u1Present
3631#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3632 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
3633#endif
3634 )
3635 {
3636 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3637 if (pSubPage)
3638 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3639 else
3640 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3641 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3642 }
3643 }
3644}
3645
3646
3647/**
3648 * Clear references to shadowed pages in a 64-bit level 4 page table.
3649 *
3650 * @param pPool The pool.
3651 * @param pPage The page.
3652 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3653 */
3654DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3655{
3656 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3657 {
3658 if (pShwPML4->a[i].n.u1Present)
3659 {
3660 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3661 if (pSubPage)
3662 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3663 else
3664 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3665 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3666 }
3667 }
3668}
3669
3670
3671/**
3672 * Clear references to shadowed pages in an EPT page table.
3673 *
3674 * @param pPool The pool.
3675 * @param pPage The page.
3676 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3677 */
3678DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3679{
3680 RTGCPHYS GCPhys = pPage->GCPhys;
3681 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3682 if (pShwPT->a[i].n.u1Present)
3683 {
3684 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3685 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3686 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3687 }
3688}
3689
3690
3691/**
3692 * Clear references to shadowed pages in an EPT page directory.
3693 *
3694 * @param pPool The pool.
3695 * @param pPage The page.
3696 * @param pShwPD The shadow page directory (mapping of the page).
3697 */
3698DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3699{
3700 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3701 {
3702 if (pShwPD->a[i].n.u1Present)
3703 {
3704 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3705 if (pSubPage)
3706 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3707 else
3708 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3709 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3710 }
3711 }
3712}
3713
3714
3715/**
3716 * Clear references to shadowed pages in an EPT page directory pointer table.
3717 *
3718 * @param pPool The pool.
3719 * @param pPage The page.
3720 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3721 */
3722DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3723{
3724 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3725 {
3726 if (pShwPDPT->a[i].n.u1Present)
3727 {
3728 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3729 if (pSubPage)
3730 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3731 else
3732 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3733 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3734 }
3735 }
3736}
3737
3738
3739/**
3740 * Clears all references made by this page.
3741 *
3742 * This includes other shadow pages and GC physical addresses.
3743 *
3744 * @param pPool The pool.
3745 * @param pPage The page.
3746 */
3747static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3748{
3749 /*
3750 * Map the shadow page and take action according to the page kind.
3751 */
3752 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3753 switch (pPage->enmKind)
3754 {
3755#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3756 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3757 {
3758 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3759 void *pvGst;
3760 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3761 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3762 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3763 break;
3764 }
3765
3766 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3767 {
3768 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3769 void *pvGst;
3770 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3771 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3772 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3773 break;
3774 }
3775
3776 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3777 {
3778 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3779 void *pvGst;
3780 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3781 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3782 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3783 break;
3784 }
3785
3786 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3787 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3788 {
3789 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3790 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3791 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3792 break;
3793 }
3794
3795 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3796 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3797 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3798 {
3799 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3800 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3801 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3802 break;
3803 }
3804
3805#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3806 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3807 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3808 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3809 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3810 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3811 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3812 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3813 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3814 break;
3815#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3816
3817 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3818 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3819 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3820 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3821 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3822 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3823 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3824 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3825 break;
3826
3827#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3828 case PGMPOOLKIND_32BIT_PD:
3829 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
3830 break;
3831
3832 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3833 case PGMPOOLKIND_PAE_PDPT:
3834#endif
3835 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3836 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3837 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3838 break;
3839
3840 case PGMPOOLKIND_64BIT_PML4:
3841 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3842 break;
3843
3844 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3845 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
3846 break;
3847
3848 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3849 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
3850 break;
3851
3852 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3853 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
3854 break;
3855
3856 default:
3857 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3858 }
3859
3860 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3861 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3862 ASMMemZeroPage(pvShw);
3863 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3864 pPage->fZeroed = true;
3865}
3866
3867#endif /* PGMPOOL_WITH_USER_TRACKING */
3868
3869/**
3870 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3871 *
3872 * @param pPool The pool.
3873 */
3874static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3875{
3876#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3877 /* Start a subset so we won't run out of mapping space. */
3878 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
3879 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3880#endif
3881
3882 /*
3883 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3884 */
3885 Assert(NIL_PGMPOOL_IDX == 0);
3886 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3887 {
3888 /*
3889 * Get the page address.
3890 */
3891 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3892 union
3893 {
3894 uint64_t *pau64;
3895 uint32_t *pau32;
3896 } u;
3897
3898 /*
3899 * Mark stuff not present.
3900 */
3901 switch (pPage->enmKind)
3902 {
3903#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
3904 case PGMPOOLKIND_ROOT_32BIT_PD:
3905 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3906 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3907 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3908 u.pau32[iPage] = 0;
3909 break;
3910
3911 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3912 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3913 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
3914 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3915 u.pau64[iPage] = 0;
3916 break;
3917
3918 case PGMPOOLKIND_ROOT_PDPT:
3919 /* Not root of shadowed pages currently, ignore it. */
3920 break;
3921#endif
3922
3923 case PGMPOOLKIND_ROOT_NESTED:
3924 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3925 ASMMemZero32(u.pau64, PAGE_SIZE);
3926 break;
3927 }
3928 }
3929
3930 /*
3931 * Paranoia (to be removed), flag a global CR3 sync.
3932 */
3933 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
3934
3935#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3936 /* Pop the subset. */
3937 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3938#endif
3939}
3940
3941
3942/**
3943 * Flushes the entire cache.
3944 *
3945 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3946 * and execute this CR3 flush.
3947 *
3948 * @param pPool The pool.
3949 */
3950static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3951{
3952 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3953 LogFlow(("pgmPoolFlushAllInt:\n"));
3954
3955 /*
3956 * If there are no pages in the pool, there is nothing to do.
3957 */
3958 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3959 {
3960 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3961 return;
3962 }
3963
3964 /*
3965 * Nuke the free list and reinsert all pages into it.
3966 */
3967 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3968 {
3969 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3970
3971#ifdef IN_RING3
3972 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMR3, pPage->pvPageR3));
3973#endif
3974#ifdef PGMPOOL_WITH_MONITORING
3975 if (pPage->fMonitored)
3976 pgmPoolMonitorFlush(pPool, pPage);
3977 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3978 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3979 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3980 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3981 pPage->cModifications = 0;
3982#endif
3983 pPage->GCPhys = NIL_RTGCPHYS;
3984 pPage->enmKind = PGMPOOLKIND_FREE;
3985 Assert(pPage->idx == i);
3986 pPage->iNext = i + 1;
3987 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3988 pPage->fSeenNonGlobal = false;
3989 pPage->fMonitored= false;
3990 pPage->fCached = false;
3991 pPage->fReusedFlushPending = false;
3992 pPage->fCR3Mix = false;
3993#ifdef PGMPOOL_WITH_USER_TRACKING
3994 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3995#endif
3996#ifdef PGMPOOL_WITH_CACHE
3997 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3998 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3999#endif
4000 }
4001 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4002 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4003 pPool->cUsedPages = 0;
4004
4005#ifdef PGMPOOL_WITH_USER_TRACKING
4006 /*
4007 * Zap and reinitialize the user records.
4008 */
4009 pPool->cPresent = 0;
4010 pPool->iUserFreeHead = 0;
4011 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4012 const unsigned cMaxUsers = pPool->cMaxUsers;
4013 for (unsigned i = 0; i < cMaxUsers; i++)
4014 {
4015 paUsers[i].iNext = i + 1;
4016 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4017 paUsers[i].iUserTable = 0xfffffffe;
4018 }
4019 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4020#endif
4021
4022#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4023 /*
4024 * Clear all the GCPhys links and rebuild the phys ext free list.
4025 */
4026 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4027 pRam;
4028 pRam = pRam->CTX_SUFF(pNext))
4029 {
4030 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4031 while (iPage-- > 0)
4032 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
4033 }
4034
4035 pPool->iPhysExtFreeHead = 0;
4036 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4037 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4038 for (unsigned i = 0; i < cMaxPhysExts; i++)
4039 {
4040 paPhysExts[i].iNext = i + 1;
4041 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4042 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4043 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4044 }
4045 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4046#endif
4047
4048#ifdef PGMPOOL_WITH_MONITORING
4049 /*
4050 * Just zap the modified list.
4051 */
4052 pPool->cModifiedPages = 0;
4053 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4054#endif
4055
4056#ifdef PGMPOOL_WITH_CACHE
4057 /*
4058 * Clear the GCPhys hash and the age list.
4059 */
4060 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4061 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4062 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4063 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4064#endif
4065
4066 /*
4067 * Flush all the special root pages.
4068 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4069 */
4070 pgmPoolFlushAllSpecialRoots(pPool);
4071 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4072 {
4073 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4074 pPage->iNext = NIL_PGMPOOL_IDX;
4075#ifdef PGMPOOL_WITH_MONITORING
4076 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4077 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4078 pPage->cModifications = 0;
4079 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4080 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4081 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4082 if (pPage->fMonitored)
4083 {
4084 PVM pVM = pPool->CTX_SUFF(pVM);
4085 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4086 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4087 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4088 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4089 pPool->pszAccessHandler);
4090 AssertFatalRCSuccess(rc);
4091# ifdef PGMPOOL_WITH_CACHE
4092 pgmPoolHashInsert(pPool, pPage);
4093# endif
4094 }
4095#endif
4096#ifdef PGMPOOL_WITH_USER_TRACKING
4097 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4098#endif
4099#ifdef PGMPOOL_WITH_CACHE
4100 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4101 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4102#endif
4103 }
4104
4105 /*
4106 * Finally, assert the FF.
4107 */
4108 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
4109
4110 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4111}
4112
4113
4114/**
4115 * Flushes a pool page.
4116 *
4117 * This moves the page to the free list after removing all user references to it.
4118 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
4119 *
4120 * @returns VBox status code.
4121 * @retval VINF_SUCCESS on success.
4122 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
4123 * @param pPool The pool.
4124 * @param HCPhys The HC physical address of the shadow page.
4125 */
4126int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4127{
4128 int rc = VINF_SUCCESS;
4129 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4130 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%d, .GCPhys=%RGp}\n",
4131 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
4132
4133 /*
4134 * Quietly reject any attempts at flushing any of the special root pages.
4135 */
4136 if (pPage->idx < PGMPOOL_IDX_FIRST)
4137 {
4138 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4139 return VINF_SUCCESS;
4140 }
4141
4142 /*
4143 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4144 */
4145 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
4146 {
4147#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4148 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4,
4149 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4150#endif
4151 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4152 return VINF_SUCCESS;
4153 }
4154
4155#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4156 /* Start a subset so we won't run out of mapping space. */
4157 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4158 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4159#endif
4160
4161 /*
4162 * Mark the page as being in need of a ASMMemZeroPage().
4163 */
4164 pPage->fZeroed = false;
4165
4166#ifdef PGMPOOL_WITH_USER_TRACKING
4167 /*
4168 * Clear the page.
4169 */
4170 pgmPoolTrackClearPageUsers(pPool, pPage);
4171 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4172 pgmPoolTrackDeref(pPool, pPage);
4173 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4174#endif
4175
4176#ifdef PGMPOOL_WITH_CACHE
4177 /*
4178 * Flush it from the cache.
4179 */
4180 pgmPoolCacheFlushPage(pPool, pPage);
4181#endif /* PGMPOOL_WITH_CACHE */
4182
4183#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4184 /* Heavy stuff done. */
4185 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4186#endif
4187
4188#ifdef PGMPOOL_WITH_MONITORING
4189 /*
4190 * Deregistering the monitoring.
4191 */
4192 if (pPage->fMonitored)
4193 rc = pgmPoolMonitorFlush(pPool, pPage);
4194#endif
4195
4196 /*
4197 * Free the page.
4198 */
4199 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4200 pPage->iNext = pPool->iFreeHead;
4201 pPool->iFreeHead = pPage->idx;
4202 pPage->enmKind = PGMPOOLKIND_FREE;
4203 pPage->GCPhys = NIL_RTGCPHYS;
4204 pPage->fReusedFlushPending = false;
4205
4206 pPool->cUsedPages--;
4207 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4208 return rc;
4209}
4210
4211
4212/**
4213 * Frees a usage of a pool page.
4214 *
4215 * The caller is responsible to updating the user table so that it no longer
4216 * references the shadow page.
4217 *
4218 * @param pPool The pool.
4219 * @param HCPhys The HC physical address of the shadow page.
4220 * @param iUser The shadow page pool index of the user table.
4221 * @param iUserTable The index into the user table (shadowed).
4222 */
4223void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4224{
4225 STAM_PROFILE_START(&pPool->StatFree, a);
4226 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
4227 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
4228 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4229#ifdef PGMPOOL_WITH_USER_TRACKING
4230 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4231#endif
4232#ifdef PGMPOOL_WITH_CACHE
4233 if (!pPage->fCached)
4234#endif
4235 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
4236 STAM_PROFILE_STOP(&pPool->StatFree, a);
4237}
4238
4239
4240/**
4241 * Makes one or more free page free.
4242 *
4243 * @returns VBox status code.
4244 * @retval VINF_SUCCESS on success.
4245 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4246 *
4247 * @param pPool The pool.
4248 * @param iUser The user of the page.
4249 */
4250static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
4251{
4252 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4253
4254 /*
4255 * If the pool isn't full grown yet, expand it.
4256 */
4257 if (pPool->cCurPages < pPool->cMaxPages)
4258 {
4259 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4260#ifdef IN_RING3
4261 int rc = PGMR3PoolGrow(pPool->pVMR3);
4262#else
4263 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
4264#endif
4265 if (RT_FAILURE(rc))
4266 return rc;
4267 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4268 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4269 return VINF_SUCCESS;
4270 }
4271
4272#ifdef PGMPOOL_WITH_CACHE
4273 /*
4274 * Free one cached page.
4275 */
4276 return pgmPoolCacheFreeOne(pPool, iUser);
4277#else
4278 /*
4279 * Flush the pool.
4280 *
4281 * If we have tracking enabled, it should be possible to come up with
4282 * a cheap replacement strategy...
4283 */
4284 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
4285 Assert(!CPUMIsGuestInLongMode(pVM));
4286 pgmPoolFlushAllInt(pPool);
4287 return VERR_PGM_POOL_FLUSHED;
4288#endif
4289}
4290
4291
4292/**
4293 * Allocates a page from the pool.
4294 *
4295 * This page may actually be a cached page and not in need of any processing
4296 * on the callers part.
4297 *
4298 * @returns VBox status code.
4299 * @retval VINF_SUCCESS if a NEW page was allocated.
4300 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4301 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4302 * @param pVM The VM handle.
4303 * @param GCPhys The GC physical address of the page we're gonna shadow.
4304 * For 4MB and 2MB PD entries, it's the first address the
4305 * shadow PT is covering.
4306 * @param enmKind The kind of mapping.
4307 * @param iUser The shadow page pool index of the user table.
4308 * @param iUserTable The index into the user table (shadowed).
4309 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4310 */
4311int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
4312{
4313 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4314 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4315 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
4316 *ppPage = NULL;
4317 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4318 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4319 * Assert(!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4320
4321#ifdef PGMPOOL_WITH_CACHE
4322 if (pPool->fCacheEnabled)
4323 {
4324 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
4325 if (RT_SUCCESS(rc2))
4326 {
4327 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4328 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4329 return rc2;
4330 }
4331 }
4332#endif
4333
4334 /*
4335 * Allocate a new one.
4336 */
4337 int rc = VINF_SUCCESS;
4338 uint16_t iNew = pPool->iFreeHead;
4339 if (iNew == NIL_PGMPOOL_IDX)
4340 {
4341 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
4342 if (RT_FAILURE(rc))
4343 {
4344 if (rc != VERR_PGM_POOL_CLEARED)
4345 {
4346 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4347 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4348 return rc;
4349 }
4350 Log(("pgmPoolMakeMoreFreePages failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
4351 rc = VERR_PGM_POOL_FLUSHED;
4352 }
4353 iNew = pPool->iFreeHead;
4354 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4355 }
4356
4357 /* unlink the free head */
4358 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4359 pPool->iFreeHead = pPage->iNext;
4360 pPage->iNext = NIL_PGMPOOL_IDX;
4361
4362 /*
4363 * Initialize it.
4364 */
4365 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4366 pPage->enmKind = enmKind;
4367 pPage->GCPhys = GCPhys;
4368 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4369 pPage->fMonitored = false;
4370 pPage->fCached = false;
4371 pPage->fReusedFlushPending = false;
4372 pPage->fCR3Mix = false;
4373#ifdef PGMPOOL_WITH_MONITORING
4374 pPage->cModifications = 0;
4375 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4376 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4377#endif
4378#ifdef PGMPOOL_WITH_USER_TRACKING
4379 pPage->cPresent = 0;
4380 pPage->iFirstPresent = ~0;
4381
4382 /*
4383 * Insert into the tracking and cache. If this fails, free the page.
4384 */
4385 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4386 if (RT_FAILURE(rc3))
4387 {
4388 if (rc3 != VERR_PGM_POOL_CLEARED)
4389 {
4390 pPool->cUsedPages--;
4391 pPage->enmKind = PGMPOOLKIND_FREE;
4392 pPage->GCPhys = NIL_RTGCPHYS;
4393 pPage->iNext = pPool->iFreeHead;
4394 pPool->iFreeHead = pPage->idx;
4395 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4396 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4397 return rc3;
4398 }
4399 Log(("pgmPoolTrackInsert failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4400 rc = VERR_PGM_POOL_FLUSHED;
4401 }
4402#endif /* PGMPOOL_WITH_USER_TRACKING */
4403
4404 /*
4405 * Commit the allocation, clear the page and return.
4406 */
4407#ifdef VBOX_WITH_STATISTICS
4408 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4409 pPool->cUsedPagesHigh = pPool->cUsedPages;
4410#endif
4411
4412 if (!pPage->fZeroed)
4413 {
4414 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4415 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4416 ASMMemZeroPage(pv);
4417 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4418 }
4419
4420 *ppPage = pPage;
4421 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4422 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4423 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4424 return rc;
4425}
4426
4427
4428/**
4429 * Frees a usage of a pool page.
4430 *
4431 * @param pVM The VM handle.
4432 * @param HCPhys The HC physical address of the shadow page.
4433 * @param iUser The shadow page pool index of the user table.
4434 * @param iUserTable The index into the user table (shadowed).
4435 */
4436void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4437{
4438 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4439 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4440 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4441}
4442
4443
4444/**
4445 * Gets a in-use page in the pool by it's physical address.
4446 *
4447 * @returns Pointer to the page.
4448 * @param pVM The VM handle.
4449 * @param HCPhys The HC physical address of the shadow page.
4450 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4451 */
4452PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4453{
4454 /** @todo profile this! */
4455 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4456 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4457 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%RHp -> %p:{.idx=%d .GCPhys=%RGp .enmKind=%d}\n",
4458 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
4459 return pPage;
4460}
4461
4462
4463/**
4464 * Flushes the entire cache.
4465 *
4466 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4467 * and execute this CR3 flush.
4468 *
4469 * @param pPool The pool.
4470 */
4471void pgmPoolFlushAll(PVM pVM)
4472{
4473 LogFlow(("pgmPoolFlushAll:\n"));
4474 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4475}
4476
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette