VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 15410

Last change on this file since 15410 was 15410, checked in by vboxsync, 16 years ago

PGM: Deal with pgmPoolAlloc failure in MapCR3 without relying on having to clear the pool. The MapCR3 action will be postponed to SyncCR3.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 150.1 KB
Line 
1/* $Id: PGMAllPool.cpp 15410 2008-12-13 01:04:17Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42
43
44/*******************************************************************************
45* Internal Functions *
46*******************************************************************************/
47__BEGIN_DECLS
48static void pgmPoolFlushAllInt(PPGMPOOL pPool);
49#ifdef PGMPOOL_WITH_USER_TRACKING
50DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
51DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#endif
54#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
55static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
56#endif
57#ifdef PGMPOOL_WITH_CACHE
58static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
59#endif
60#ifdef PGMPOOL_WITH_MONITORING
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#endif
63#ifndef IN_RING3
64DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
65#endif
66__END_DECLS
67
68
69/**
70 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
71 *
72 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
73 * @param enmKind The page kind.
74 */
75DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
76{
77 switch (enmKind)
78 {
79 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
82 return true;
83 default:
84 return false;
85 }
86}
87
88
89#if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
90/**
91 * Maps a pool page into the current context.
92 *
93 * @returns Pointer to the mapping.
94 * @param pPGM Pointer to the PGM instance data.
95 * @param pPage The page to map.
96 */
97void *pgmPoolMapPageFallback(PPGM pPGM, PPGMPOOLPAGE pPage)
98{
99 /* general pages are take care of by the inlined part, it
100 only ends up here in case of failure. */
101 AssertReleaseReturn(pPage->idx < PGMPOOL_IDX_FIRST, NULL);
102
103/** @todo make sure HCPhys is valid for *all* indexes. */
104 /* special pages. */
105# ifdef IN_RC
106 switch (pPage->idx)
107 {
108 case PGMPOOL_IDX_PD:
109 return pPGM->pShw32BitPdRC;
110 case PGMPOOL_IDX_PAE_PD:
111 case PGMPOOL_IDX_PAE_PD_0:
112 return pPGM->apShwPaePDsRC[0];
113 case PGMPOOL_IDX_PAE_PD_1:
114 return pPGM->apShwPaePDsRC[1];
115 case PGMPOOL_IDX_PAE_PD_2:
116 return pPGM->apShwPaePDsRC[2];
117 case PGMPOOL_IDX_PAE_PD_3:
118 return pPGM->apShwPaePDsRC[3];
119 case PGMPOOL_IDX_PDPT:
120 return pPGM->pShwPaePdptRC;
121 default:
122 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
123 return NULL;
124 }
125
126# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
127 RTHCPHYS HCPhys;
128 switch (pPage->idx)
129 {
130 case PGMPOOL_IDX_PD:
131 HCPhys = pPGM->HCPhysShw32BitPD;
132 break;
133 case PGMPOOL_IDX_PAE_PD_0:
134 HCPhys = pPGM->aHCPhysPaePDs[0];
135 break;
136 case PGMPOOL_IDX_PAE_PD_1:
137 HCPhys = pPGM->aHCPhysPaePDs[1];
138 break;
139 case PGMPOOL_IDX_PAE_PD_2:
140 HCPhys = pPGM->aHCPhysPaePDs[2];
141 break;
142 case PGMPOOL_IDX_PAE_PD_3:
143 HCPhys = pPGM->aHCPhysPaePDs[3];
144 break;
145 case PGMPOOL_IDX_PDPT:
146 HCPhys = pPGM->HCPhysShwPaePdpt;
147 break;
148 case PGMPOOL_IDX_NESTED_ROOT:
149 HCPhys = pPGM->HCPhysShwNestedRoot;
150 break;
151 case PGMPOOL_IDX_PAE_PD:
152 AssertReleaseMsgFailed(("PGMPOOL_IDX_PAE_PD is not usable in VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 context\n"));
153 return NULL;
154 default:
155 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
156 return NULL;
157 }
158 AssertMsg(HCPhys && HCPhys != NIL_RTHCPHYS && !(PAGE_OFFSET_MASK & HCPhys), ("%RHp\n", HCPhys));
159
160 void *pv;
161 int rc = pgmR0DynMapHCPageInlined(pPGM, HCPhys, &pv);
162 AssertReleaseRCReturn(rc, NULL);
163 return pv;
164# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
165}
166#endif /* IN_RC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
167
168
169#ifdef PGMPOOL_WITH_MONITORING
170/**
171 * Determin the size of a write instruction.
172 * @returns number of bytes written.
173 * @param pDis The disassembler state.
174 */
175static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
176{
177 /*
178 * This is very crude and possibly wrong for some opcodes,
179 * but since it's not really supposed to be called we can
180 * probably live with that.
181 */
182 return DISGetParamSize(pDis, &pDis->param1);
183}
184
185
186/**
187 * Flushes a chain of pages sharing the same access monitor.
188 *
189 * @returns VBox status code suitable for scheduling.
190 * @param pPool The pool.
191 * @param pPage A page in the chain.
192 */
193int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
194{
195 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
196
197 /*
198 * Find the list head.
199 */
200 uint16_t idx = pPage->idx;
201 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
202 {
203 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
204 {
205 idx = pPage->iMonitoredPrev;
206 Assert(idx != pPage->idx);
207 pPage = &pPool->aPages[idx];
208 }
209 }
210
211 /*
212 * Iterate the list flushing each shadow page.
213 */
214 int rc = VINF_SUCCESS;
215 for (;;)
216 {
217 idx = pPage->iMonitoredNext;
218 Assert(idx != pPage->idx);
219 if (pPage->idx >= PGMPOOL_IDX_FIRST)
220 {
221 int rc2 = pgmPoolFlushPage(pPool, pPage);
222 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
223 rc = VINF_PGM_SYNC_CR3;
224 }
225 /* next */
226 if (idx == NIL_PGMPOOL_IDX)
227 break;
228 pPage = &pPool->aPages[idx];
229 }
230 return rc;
231}
232
233
234/**
235 * Wrapper for getting the current context pointer to the entry being modified.
236 *
237 * @returns Pointer to the current context mapping of the entry.
238 * @param pPool The pool.
239 * @param pvFault The fault virtual address.
240 * @param GCPhysFault The fault physical address.
241 * @param cbEntry The entry size.
242 */
243#ifdef IN_RING3
244DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
245#else
246DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
247#endif
248{
249#ifdef IN_RC
250 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
251
252#elif defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
253 void *pvRet;
254 int rc = PGMDynMapGCPageOff(pPool->pVMR0, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
255 AssertFatalRCSuccess(rc);
256 return pvRet;
257
258#elif defined(IN_RING0)
259 void *pvRet;
260 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMR0->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
261 AssertFatalRCSuccess(rc);
262 return pvRet;
263
264#elif defined(IN_RING3)
265 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
266#else
267# error "huh?"
268#endif
269}
270
271
272/**
273 * Process shadow entries before they are changed by the guest.
274 *
275 * For PT entries we will clear them. For PD entries, we'll simply check
276 * for mapping conflicts and set the SyncCR3 FF if found.
277 *
278 * @param pPool The pool.
279 * @param pPage The head page.
280 * @param GCPhysFault The guest physical fault address.
281 * @param uAddress In R0 and GC this is the guest context fault address (flat).
282 * In R3 this is the host context 'fault' address.
283 * @param pCpu The disassembler state for figuring out the write size.
284 * This need not be specified if the caller knows we won't do cross entry accesses.
285 */
286#ifdef IN_RING3
287void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
288#else
289void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
290#endif
291{
292 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
293 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
294 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
295
296 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
297
298 for (;;)
299 {
300 union
301 {
302 void *pv;
303 PX86PT pPT;
304 PX86PTPAE pPTPae;
305 PX86PD pPD;
306 PX86PDPAE pPDPae;
307 PX86PDPT pPDPT;
308 PX86PML4 pPML4;
309 } uShw;
310
311 switch (pPage->enmKind)
312 {
313 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
314 {
315 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
316 const unsigned iShw = off / sizeof(X86PTE);
317 if (uShw.pPT->a[iShw].n.u1Present)
318 {
319# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
320 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
321 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
322 pgmPoolTracDerefGCPhysHint(pPool, pPage,
323 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
324 pGstPte->u & X86_PTE_PG_MASK);
325# endif
326 uShw.pPT->a[iShw].u = 0;
327 }
328 break;
329 }
330
331 /* page/2 sized */
332 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
333 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
334 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
335 {
336 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
337 if (uShw.pPTPae->a[iShw].n.u1Present)
338 {
339# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
340 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
341 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
342 pgmPoolTracDerefGCPhysHint(pPool, pPage,
343 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
344 pGstPte->u & X86_PTE_PG_MASK);
345# endif
346 uShw.pPTPae->a[iShw].u = 0;
347 }
348 }
349 break;
350
351 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
352 {
353 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
354 const unsigned iShw = off / sizeof(X86PTEPAE);
355 if (uShw.pPTPae->a[iShw].n.u1Present)
356 {
357# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
358 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
359 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
360 pgmPoolTracDerefGCPhysHint(pPool, pPage,
361 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
362 pGstPte->u & X86_PTE_PAE_PG_MASK);
363# endif
364 uShw.pPTPae->a[iShw].u = 0;
365 }
366
367 /* paranoia / a bit assumptive. */
368 if ( pCpu
369 && (off & 7)
370 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
371 {
372 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
373 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
374
375 if (uShw.pPTPae->a[iShw2].n.u1Present)
376 {
377# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
378 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
379 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
380 pgmPoolTracDerefGCPhysHint(pPool, pPage,
381 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
382 pGstPte->u & X86_PTE_PAE_PG_MASK);
383# endif
384 uShw.pPTPae->a[iShw2].u = 0;
385 }
386 }
387
388 break;
389 }
390
391 case PGMPOOLKIND_ROOT_32BIT_PD:
392 {
393 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
394 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
395 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
396 {
397 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
398 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
399 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
400 }
401 /* paranoia / a bit assumptive. */
402 else if ( pCpu
403 && (off & 3)
404 && (off & 3) + cbWrite > sizeof(X86PTE))
405 {
406 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
407 if ( iShw2 != iShw
408 && iShw2 < RT_ELEMENTS(uShw.pPD->a)
409 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
410 {
411 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
412 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
413 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
414 }
415 }
416#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
417 if ( uShw.pPD->a[iShw].n.u1Present
418 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
419 {
420 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
421# ifdef IN_RC /* TLB load - we're pushing things a bit... */
422 ASMProbeReadByte(pvAddress);
423# endif
424 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
425 uShw.pPD->a[iShw].u = 0;
426 }
427#endif
428 break;
429 }
430
431 case PGMPOOLKIND_ROOT_PAE_PD:
432 {
433 unsigned iGst = off / sizeof(X86PDE); // ASSUMING 32-bit guest paging!
434 unsigned iShwPdpt = iGst / 256;
435 unsigned iShw = (iGst % 256) * 2;
436 Assert(pPage->idx == PGMPOOL_IDX_PAE_PD);
437 PPGMPOOLPAGE pPage2 = pPage + 1 + iShwPdpt;
438 Assert(pPage2->idx == PGMPOOL_IDX_PAE_PD_0 + iShwPdpt);
439 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage2);
440 for (unsigned i = 0; i < 2; i++, iShw++)
441 {
442 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
443 {
444 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
445 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
446 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw));
447 }
448 /* paranoia / a bit assumptive. */
449 else if ( pCpu
450 && (off & 3)
451 && (off & 3) + cbWrite > 4)
452 {
453 const unsigned iShw2 = iShw + 2;
454 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a) /** @todo was completely wrong, it's better now after #1865 but still wrong from cross PD. */
455 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
456 {
457 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
458 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
459 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
460 }
461 }
462#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
463 if ( uShw.pPDPae->a[iShw].n.u1Present
464 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
465 {
466 LogFlow(("pgmPoolMonitorChainChanging: iShwPdpt=%#x iShw=%#x: %RX64 -> freeing it!\n", iShwPdpt, iShw, uShw.pPDPae->a[iShw].u));
467# ifdef IN_RC /* TLB load - we're pushing things a bit... */
468 ASMProbeReadByte(pvAddress);
469# endif
470 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + iShwPdpt * X86_PG_PAE_ENTRIES);
471 uShw.pPDPae->a[iShw].u = 0;
472 }
473#endif
474 }
475 break;
476 }
477
478 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
479 {
480 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
481 const unsigned iShw = off / sizeof(X86PDEPAE);
482 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
483 {
484 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
485 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
486 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
487 }
488#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
489 /*
490 * Causes trouble when the guest uses a PDE to refer to the whole page table level
491 * structure. (Invalidate here; faults later on when it tries to change the page
492 * table entries -> recheck; probably only applies to the RC case.)
493 */
494 else
495 {
496 if (uShw.pPDPae->a[iShw].n.u1Present)
497 {
498 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
499 pgmPoolFree(pPool->CTX_SUFF(pVM),
500 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
501 /* Note: hardcoded PAE implementation dependency */
502 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
503 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
504 uShw.pPDPae->a[iShw].u = 0;
505 }
506 }
507#endif
508 /* paranoia / a bit assumptive. */
509 if ( pCpu
510 && (off & 7)
511 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
512 {
513 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
514 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
515
516 if ( iShw2 != iShw
517 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
518 {
519 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
520 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
521 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
522 }
523#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
524 else if (uShw.pPDPae->a[iShw2].n.u1Present)
525 {
526 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
527 pgmPoolFree(pPool->CTX_SUFF(pVM),
528 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
529 /* Note: hardcoded PAE implementation dependency */
530 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
531 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
532 uShw.pPDPae->a[iShw2].u = 0;
533 }
534#endif
535 }
536 break;
537 }
538
539 case PGMPOOLKIND_ROOT_PDPT:
540 {
541 /*
542 * Hopefully this doesn't happen very often:
543 * - touching unused parts of the page
544 * - messing with the bits of pd pointers without changing the physical address
545 */
546 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
547 const unsigned iShw = off / sizeof(X86PDPE);
548 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
549 {
550 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
551 {
552 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
553 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
554 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
555 }
556 /* paranoia / a bit assumptive. */
557 else if ( pCpu
558 && (off & 7)
559 && (off & 7) + cbWrite > sizeof(X86PDPE))
560 {
561 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
562 if ( iShw2 != iShw
563 && iShw2 < X86_PG_PAE_PDPE_ENTRIES
564 && uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
565 {
566 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
567 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
568 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
569 }
570 }
571 }
572 break;
573 }
574
575#ifndef IN_RC
576 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
577 {
578 Assert(pPage->enmKind == PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD);
579
580 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
581 const unsigned iShw = off / sizeof(X86PDEPAE);
582 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
583 {
584 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
585 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
586 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
587 }
588 else
589 {
590 if (uShw.pPDPae->a[iShw].n.u1Present)
591 {
592 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
593 pgmPoolFree(pPool->CTX_SUFF(pVM),
594 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
595 pPage->idx,
596 iShw);
597 uShw.pPDPae->a[iShw].u = 0;
598 }
599 }
600 /* paranoia / a bit assumptive. */
601 if ( pCpu
602 && (off & 7)
603 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
604 {
605 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
606 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
607
608 if ( iShw2 != iShw
609 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
610 {
611 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
612 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
613 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
614 }
615 else
616 if (uShw.pPDPae->a[iShw2].n.u1Present)
617 {
618 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
619 pgmPoolFree(pPool->CTX_SUFF(pVM),
620 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
621 pPage->idx,
622 iShw2);
623 uShw.pPDPae->a[iShw2].u = 0;
624 }
625 }
626 break;
627 }
628
629 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
630 {
631 /*
632 * Hopefully this doesn't happen very often:
633 * - messing with the bits of pd pointers without changing the physical address
634 */
635 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
636 {
637 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
638 const unsigned iShw = off / sizeof(X86PDPE);
639 if (uShw.pPDPT->a[iShw].n.u1Present)
640 {
641 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
642 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
643 uShw.pPDPT->a[iShw].u = 0;
644 }
645 /* paranoia / a bit assumptive. */
646 if ( pCpu
647 && (off & 7)
648 && (off & 7) + cbWrite > sizeof(X86PDPE))
649 {
650 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
651 if (uShw.pPDPT->a[iShw2].n.u1Present)
652 {
653 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
654 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
655 uShw.pPDPT->a[iShw2].u = 0;
656 }
657 }
658 }
659 break;
660 }
661
662 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
663 {
664 /*
665 * Hopefully this doesn't happen very often:
666 * - messing with the bits of pd pointers without changing the physical address
667 */
668 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
669 {
670 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
671 const unsigned iShw = off / sizeof(X86PDPE);
672 if (uShw.pPML4->a[iShw].n.u1Present)
673 {
674 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
675 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
676 uShw.pPML4->a[iShw].u = 0;
677 }
678 /* paranoia / a bit assumptive. */
679 if ( pCpu
680 && (off & 7)
681 && (off & 7) + cbWrite > sizeof(X86PDPE))
682 {
683 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
684 if (uShw.pPML4->a[iShw2].n.u1Present)
685 {
686 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
687 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
688 uShw.pPML4->a[iShw2].u = 0;
689 }
690 }
691 }
692 break;
693 }
694#endif /* IN_RING0 */
695
696 default:
697 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
698 }
699
700 /* next */
701 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
702 return;
703 pPage = &pPool->aPages[pPage->iMonitoredNext];
704 }
705}
706
707
708# ifndef IN_RING3
709/**
710 * Checks if a access could be a fork operation in progress.
711 *
712 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
713 *
714 * @returns true if it's likly that we're forking, otherwise false.
715 * @param pPool The pool.
716 * @param pCpu The disassembled instruction.
717 * @param offFault The access offset.
718 */
719DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
720{
721 /*
722 * i386 linux is using btr to clear X86_PTE_RW.
723 * The functions involved are (2.6.16 source inspection):
724 * clear_bit
725 * ptep_set_wrprotect
726 * copy_one_pte
727 * copy_pte_range
728 * copy_pmd_range
729 * copy_pud_range
730 * copy_page_range
731 * dup_mmap
732 * dup_mm
733 * copy_mm
734 * copy_process
735 * do_fork
736 */
737 if ( pCpu->pCurInstr->opcode == OP_BTR
738 && !(offFault & 4)
739 /** @todo Validate that the bit index is X86_PTE_RW. */
740 )
741 {
742 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
743 return true;
744 }
745 return false;
746}
747
748
749/**
750 * Determine whether the page is likely to have been reused.
751 *
752 * @returns true if we consider the page as being reused for a different purpose.
753 * @returns false if we consider it to still be a paging page.
754 * @param pVM VM Handle.
755 * @param pPage The page in question.
756 * @param pRegFrame Trap register frame.
757 * @param pCpu The disassembly info for the faulting instruction.
758 * @param pvFault The fault address.
759 *
760 * @remark The REP prefix check is left to the caller because of STOSD/W.
761 */
762DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
763{
764#ifndef IN_RC
765 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
766 if ( HWACCMHasPendingIrq(pVM)
767 && (pRegFrame->rsp - pvFault) < 32)
768 {
769 /* Fault caused by stack writes while trying to inject an interrupt event. */
770 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
771 return true;
772 }
773#else
774 NOREF(pVM); NOREF(pvFault);
775#endif
776
777 switch (pCpu->pCurInstr->opcode)
778 {
779 /* call implies the actual push of the return address faulted */
780 case OP_CALL:
781 Log4(("pgmPoolMonitorIsReused: CALL\n"));
782 return true;
783 case OP_PUSH:
784 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
785 return true;
786 case OP_PUSHF:
787 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
788 return true;
789 case OP_PUSHA:
790 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
791 return true;
792 case OP_FXSAVE:
793 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
794 return true;
795 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
796 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
797 return true;
798 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
799 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
800 return true;
801 case OP_MOVSWD:
802 case OP_STOSWD:
803 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
804 && pRegFrame->rcx >= 0x40
805 )
806 {
807 Assert(pCpu->mode == CPUMODE_64BIT);
808
809 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
810 return true;
811 }
812 return false;
813 }
814 if ( (pCpu->param1.flags & USE_REG_GEN32)
815 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
816 {
817 Log4(("pgmPoolMonitorIsReused: ESP\n"));
818 return true;
819 }
820
821 //if (pPage->fCR3Mix)
822 // return false;
823 return false;
824}
825
826
827/**
828 * Flushes the page being accessed.
829 *
830 * @returns VBox status code suitable for scheduling.
831 * @param pVM The VM handle.
832 * @param pPool The pool.
833 * @param pPage The pool page (head).
834 * @param pCpu The disassembly of the write instruction.
835 * @param pRegFrame The trap register frame.
836 * @param GCPhysFault The fault address as guest physical address.
837 * @param pvFault The fault address.
838 */
839static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
840 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
841{
842 /*
843 * First, do the flushing.
844 */
845 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
846
847 /*
848 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
849 */
850 uint32_t cbWritten;
851 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
852 if (RT_SUCCESS(rc2))
853 pRegFrame->rip += pCpu->opsize;
854 else if (rc2 == VERR_EM_INTERPRETER)
855 {
856#ifdef IN_RC
857 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
858 {
859 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
860 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
861 rc = VINF_SUCCESS;
862 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
863 }
864 else
865#endif
866 {
867 rc = VINF_EM_RAW_EMULATE_INSTR;
868 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
869 }
870 }
871 else
872 rc = rc2;
873
874 /* See use in pgmPoolAccessHandlerSimple(). */
875 PGM_INVL_GUEST_TLBS();
876
877 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
878 return rc;
879
880}
881
882
883/**
884 * Handles the STOSD write accesses.
885 *
886 * @returns VBox status code suitable for scheduling.
887 * @param pVM The VM handle.
888 * @param pPool The pool.
889 * @param pPage The pool page (head).
890 * @param pCpu The disassembly of the write instruction.
891 * @param pRegFrame The trap register frame.
892 * @param GCPhysFault The fault address as guest physical address.
893 * @param pvFault The fault address.
894 */
895DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
896 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
897{
898 Assert(pCpu->mode == CPUMODE_32BIT);
899
900 /*
901 * Increment the modification counter and insert it into the list
902 * of modified pages the first time.
903 */
904 if (!pPage->cModifications++)
905 pgmPoolMonitorModifiedInsert(pPool, pPage);
906
907 /*
908 * Execute REP STOSD.
909 *
910 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
911 * write situation, meaning that it's safe to write here.
912 */
913 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
914 while (pRegFrame->ecx)
915 {
916 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
917#ifdef IN_RC
918 *(uint32_t *)pu32 = pRegFrame->eax;
919#else
920 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
921#endif
922 pu32 += 4;
923 GCPhysFault += 4;
924 pRegFrame->edi += 4;
925 pRegFrame->ecx--;
926 }
927 pRegFrame->rip += pCpu->opsize;
928
929 /* See use in pgmPoolAccessHandlerSimple(). */
930 PGM_INVL_GUEST_TLBS();
931
932 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
933 return VINF_SUCCESS;
934}
935
936
937/**
938 * Handles the simple write accesses.
939 *
940 * @returns VBox status code suitable for scheduling.
941 * @param pVM The VM handle.
942 * @param pPool The pool.
943 * @param pPage The pool page (head).
944 * @param pCpu The disassembly of the write instruction.
945 * @param pRegFrame The trap register frame.
946 * @param GCPhysFault The fault address as guest physical address.
947 * @param pvFault The fault address.
948 */
949DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
950 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
951{
952 /*
953 * Increment the modification counter and insert it into the list
954 * of modified pages the first time.
955 */
956 if (!pPage->cModifications++)
957 pgmPoolMonitorModifiedInsert(pPool, pPage);
958
959 /*
960 * Clear all the pages. ASSUMES that pvFault is readable.
961 */
962 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
963
964 /*
965 * Interpret the instruction.
966 */
967 uint32_t cb;
968 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
969 if (RT_SUCCESS(rc))
970 pRegFrame->rip += pCpu->opsize;
971 else if (rc == VERR_EM_INTERPRETER)
972 {
973 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
974 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
975 rc = VINF_EM_RAW_EMULATE_INSTR;
976 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
977 }
978
979 /*
980 * Quick hack, with logging enabled we're getting stale
981 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
982 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
983 * have to be fixed to support this. But that'll have to wait till next week.
984 *
985 * An alternative is to keep track of the changed PTEs together with the
986 * GCPhys from the guest PT. This may proove expensive though.
987 *
988 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
989 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
990 */
991 PGM_INVL_GUEST_TLBS();
992
993 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
994 return rc;
995}
996
997
998/**
999 * \#PF Handler callback for PT write accesses.
1000 *
1001 * @returns VBox status code (appropriate for GC return).
1002 * @param pVM VM Handle.
1003 * @param uErrorCode CPU Error code.
1004 * @param pRegFrame Trap register frame.
1005 * NULL on DMA and other non CPU access.
1006 * @param pvFault The fault address (cr2).
1007 * @param GCPhysFault The GC physical address corresponding to pvFault.
1008 * @param pvUser User argument.
1009 */
1010DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1011{
1012 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1013 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1014 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1015 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1016
1017 /*
1018 * We should ALWAYS have the list head as user parameter. This
1019 * is because we use that page to record the changes.
1020 */
1021 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1022
1023 /*
1024 * Disassemble the faulting instruction.
1025 */
1026 DISCPUSTATE Cpu;
1027 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1028 AssertRCReturn(rc, rc);
1029
1030 /*
1031 * Check if it's worth dealing with.
1032 */
1033 bool fReused = false;
1034 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1035 || pPage->fCR3Mix)
1036 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1037 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1038 {
1039 /*
1040 * Simple instructions, no REP prefix.
1041 */
1042 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1043 {
1044 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1045 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1046 return rc;
1047 }
1048
1049 /*
1050 * Windows is frequently doing small memset() operations (netio test 4k+).
1051 * We have to deal with these or we'll kill the cache and performance.
1052 */
1053 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1054 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1055 && pRegFrame->ecx <= 0x20
1056 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1057 && !((uintptr_t)pvFault & 3)
1058 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1059 && Cpu.mode == CPUMODE_32BIT
1060 && Cpu.opmode == CPUMODE_32BIT
1061 && Cpu.addrmode == CPUMODE_32BIT
1062 && Cpu.prefix == PREFIX_REP
1063 && !pRegFrame->eflags.Bits.u1DF
1064 )
1065 {
1066 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1067 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1068 return rc;
1069 }
1070
1071 /* REP prefix, don't bother. */
1072 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1073 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1074 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1075 }
1076
1077 /*
1078 * Not worth it, so flush it.
1079 *
1080 * If we considered it to be reused, don't to back to ring-3
1081 * to emulate failed instructions since we usually cannot
1082 * interpret then. This may be a bit risky, in which case
1083 * the reuse detection must be fixed.
1084 */
1085 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1086 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1087 rc = VINF_SUCCESS;
1088 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1089 return rc;
1090}
1091
1092# endif /* !IN_RING3 */
1093#endif /* PGMPOOL_WITH_MONITORING */
1094
1095#ifdef PGMPOOL_WITH_CACHE
1096
1097/**
1098 * Inserts a page into the GCPhys hash table.
1099 *
1100 * @param pPool The pool.
1101 * @param pPage The page.
1102 */
1103DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1104{
1105 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1106 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1107 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1108 pPage->iNext = pPool->aiHash[iHash];
1109 pPool->aiHash[iHash] = pPage->idx;
1110}
1111
1112
1113/**
1114 * Removes a page from the GCPhys hash table.
1115 *
1116 * @param pPool The pool.
1117 * @param pPage The page.
1118 */
1119DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1120{
1121 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1122 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1123 if (pPool->aiHash[iHash] == pPage->idx)
1124 pPool->aiHash[iHash] = pPage->iNext;
1125 else
1126 {
1127 uint16_t iPrev = pPool->aiHash[iHash];
1128 for (;;)
1129 {
1130 const int16_t i = pPool->aPages[iPrev].iNext;
1131 if (i == pPage->idx)
1132 {
1133 pPool->aPages[iPrev].iNext = pPage->iNext;
1134 break;
1135 }
1136 if (i == NIL_PGMPOOL_IDX)
1137 {
1138 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1139 break;
1140 }
1141 iPrev = i;
1142 }
1143 }
1144 pPage->iNext = NIL_PGMPOOL_IDX;
1145}
1146
1147
1148/**
1149 * Frees up one cache page.
1150 *
1151 * @returns VBox status code.
1152 * @retval VINF_SUCCESS on success.
1153 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1154 * @param pPool The pool.
1155 * @param iUser The user index.
1156 */
1157static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1158{
1159#ifndef IN_RC
1160 const PVM pVM = pPool->CTX_SUFF(pVM);
1161#endif
1162 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1163 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1164
1165 /*
1166 * Select one page from the tail of the age list.
1167 */
1168 uint16_t iToFree = pPool->iAgeTail;
1169 if (iToFree == iUser)
1170 iToFree = pPool->aPages[iToFree].iAgePrev;
1171/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1172 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1173 {
1174 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1175 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1176 {
1177 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1178 continue;
1179 iToFree = i;
1180 break;
1181 }
1182 }
1183*/
1184
1185 Assert(iToFree != iUser);
1186 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1187
1188 PPGMPOOLPAGE pPage = &pPool->aPages[iToFree];
1189
1190 /*
1191 * Reject any attempts at flushing the currently active shadow CR3 mapping
1192 */
1193 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
1194 {
1195 /* Refresh the cr3 mapping by putting it at the head of the age list. */
1196 pgmPoolCacheUsed(pPool, pPage);
1197 return pgmPoolCacheFreeOne(pPool, iUser);
1198 }
1199
1200 int rc = pgmPoolFlushPage(pPool, pPage);
1201 if (rc == VINF_SUCCESS)
1202 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1203 return rc;
1204}
1205
1206
1207/**
1208 * Checks if a kind mismatch is really a page being reused
1209 * or if it's just normal remappings.
1210 *
1211 * @returns true if reused and the cached page (enmKind1) should be flushed
1212 * @returns false if not reused.
1213 * @param enmKind1 The kind of the cached page.
1214 * @param enmKind2 The kind of the requested page.
1215 */
1216static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1217{
1218 switch (enmKind1)
1219 {
1220 /*
1221 * Never reuse them. There is no remapping in non-paging mode.
1222 */
1223 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1224 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1225 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1226 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1227 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1228 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1229 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1230 return true;
1231
1232 /*
1233 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1234 */
1235 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1236 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1237 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1238 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1239 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1240 switch (enmKind2)
1241 {
1242 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1243 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1244 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1245 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1246 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1247 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1248 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1249 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1250 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1251 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1252 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1253 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1254 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1255 return true;
1256 default:
1257 return false;
1258 }
1259
1260 /*
1261 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1262 */
1263 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1264 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1265 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1266 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1267 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1268 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1269 switch (enmKind2)
1270 {
1271 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1272 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1273 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1274 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1275 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1276 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1277 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1278 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1279 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1280 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1281 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1282 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1283 return true;
1284 default:
1285 return false;
1286 }
1287
1288 /*
1289 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1290 */
1291 case PGMPOOLKIND_ROOT_32BIT_PD:
1292 case PGMPOOLKIND_ROOT_PAE_PD:
1293 case PGMPOOLKIND_ROOT_PDPT:
1294 case PGMPOOLKIND_ROOT_NESTED:
1295 return false;
1296
1297 default:
1298 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1299 }
1300}
1301
1302
1303/**
1304 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1305 *
1306 * @returns VBox status code.
1307 * @retval VINF_PGM_CACHED_PAGE on success.
1308 * @retval VERR_FILE_NOT_FOUND if not found.
1309 * @param pPool The pool.
1310 * @param GCPhys The GC physical address of the page we're gonna shadow.
1311 * @param enmKind The kind of mapping.
1312 * @param iUser The shadow page pool index of the user table.
1313 * @param iUserTable The index into the user table (shadowed).
1314 * @param ppPage Where to store the pointer to the page.
1315 */
1316static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1317{
1318#ifndef IN_RC
1319 const PVM pVM = pPool->CTX_SUFF(pVM);
1320#endif
1321 /*
1322 * Look up the GCPhys in the hash.
1323 */
1324 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1325 Log3(("pgmPoolCacheAlloc: %RGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1326 if (i != NIL_PGMPOOL_IDX)
1327 {
1328 do
1329 {
1330 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1331 Log3(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1332 if (pPage->GCPhys == GCPhys)
1333 {
1334 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1335 {
1336 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1337 if (RT_SUCCESS(rc))
1338 {
1339 *ppPage = pPage;
1340 STAM_COUNTER_INC(&pPool->StatCacheHits);
1341 return VINF_PGM_CACHED_PAGE;
1342 }
1343 return rc;
1344 }
1345
1346 /*
1347 * The kind is different. In some cases we should now flush the page
1348 * as it has been reused, but in most cases this is normal remapping
1349 * of PDs as PT or big pages using the GCPhys field in a slightly
1350 * different way than the other kinds.
1351 */
1352 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1353 {
1354 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1355 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1356 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1357 break;
1358 }
1359 }
1360
1361 /* next */
1362 i = pPage->iNext;
1363 } while (i != NIL_PGMPOOL_IDX);
1364 }
1365
1366 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1367 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1368 return VERR_FILE_NOT_FOUND;
1369}
1370
1371
1372/**
1373 * Inserts a page into the cache.
1374 *
1375 * @param pPool The pool.
1376 * @param pPage The cached page.
1377 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1378 */
1379static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1380{
1381 /*
1382 * Insert into the GCPhys hash if the page is fit for that.
1383 */
1384 Assert(!pPage->fCached);
1385 if (fCanBeCached)
1386 {
1387 pPage->fCached = true;
1388 pgmPoolHashInsert(pPool, pPage);
1389 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1390 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1391 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1392 }
1393 else
1394 {
1395 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1396 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1397 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1398 }
1399
1400 /*
1401 * Insert at the head of the age list.
1402 */
1403 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1404 pPage->iAgeNext = pPool->iAgeHead;
1405 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1406 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1407 else
1408 pPool->iAgeTail = pPage->idx;
1409 pPool->iAgeHead = pPage->idx;
1410}
1411
1412
1413/**
1414 * Flushes a cached page.
1415 *
1416 * @param pPool The pool.
1417 * @param pPage The cached page.
1418 */
1419static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1420{
1421 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1422
1423 /*
1424 * Remove the page from the hash.
1425 */
1426 if (pPage->fCached)
1427 {
1428 pPage->fCached = false;
1429 pgmPoolHashRemove(pPool, pPage);
1430 }
1431 else
1432 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1433
1434 /*
1435 * Remove it from the age list.
1436 */
1437 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1438 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1439 else
1440 pPool->iAgeTail = pPage->iAgePrev;
1441 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1442 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1443 else
1444 pPool->iAgeHead = pPage->iAgeNext;
1445 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1446 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1447}
1448
1449#endif /* PGMPOOL_WITH_CACHE */
1450#ifdef PGMPOOL_WITH_MONITORING
1451
1452/**
1453 * Looks for pages sharing the monitor.
1454 *
1455 * @returns Pointer to the head page.
1456 * @returns NULL if not found.
1457 * @param pPool The Pool
1458 * @param pNewPage The page which is going to be monitored.
1459 */
1460static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1461{
1462#ifdef PGMPOOL_WITH_CACHE
1463 /*
1464 * Look up the GCPhys in the hash.
1465 */
1466 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1467 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1468 if (i == NIL_PGMPOOL_IDX)
1469 return NULL;
1470 do
1471 {
1472 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1473 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1474 && pPage != pNewPage)
1475 {
1476 switch (pPage->enmKind)
1477 {
1478 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1479 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1480 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1481 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1482 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1483 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1484 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1485 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1486 case PGMPOOLKIND_ROOT_32BIT_PD:
1487 case PGMPOOLKIND_ROOT_PAE_PD:
1488 case PGMPOOLKIND_ROOT_PDPT:
1489 {
1490 /* find the head */
1491 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1492 {
1493 Assert(pPage->iMonitoredPrev != pPage->idx);
1494 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1495 }
1496 return pPage;
1497 }
1498
1499 /* ignore, no monitoring. */
1500 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1501 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1502 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1503 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1504 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1505 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1506 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1507 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1508 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1509 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1510 case PGMPOOLKIND_ROOT_NESTED:
1511 break;
1512 default:
1513 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1514 }
1515 }
1516
1517 /* next */
1518 i = pPage->iNext;
1519 } while (i != NIL_PGMPOOL_IDX);
1520#endif
1521 return NULL;
1522}
1523
1524
1525/**
1526 * Enabled write monitoring of a guest page.
1527 *
1528 * @returns VBox status code.
1529 * @retval VINF_SUCCESS on success.
1530 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1531 * @param pPool The pool.
1532 * @param pPage The cached page.
1533 */
1534static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1535{
1536 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1537
1538 /*
1539 * Filter out the relevant kinds.
1540 */
1541 switch (pPage->enmKind)
1542 {
1543 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1544 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1545 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1546 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1547 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1548 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1549 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1550 case PGMPOOLKIND_ROOT_PDPT:
1551 break;
1552
1553 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1554 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1555 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1556 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1557 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1558 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1559 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1560 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1561 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1562 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1563 case PGMPOOLKIND_ROOT_NESTED:
1564 /* Nothing to monitor here. */
1565 return VINF_SUCCESS;
1566
1567 case PGMPOOLKIND_ROOT_32BIT_PD:
1568 case PGMPOOLKIND_ROOT_PAE_PD:
1569#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1570 break;
1571#endif
1572 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1573 default:
1574 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1575 }
1576
1577 /*
1578 * Install handler.
1579 */
1580 int rc;
1581 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1582 if (pPageHead)
1583 {
1584 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1585 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1586 pPage->iMonitoredPrev = pPageHead->idx;
1587 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1588 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1589 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1590 pPageHead->iMonitoredNext = pPage->idx;
1591 rc = VINF_SUCCESS;
1592 }
1593 else
1594 {
1595 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1596 PVM pVM = pPool->CTX_SUFF(pVM);
1597 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1598 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1599 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1600 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1601 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1602 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1603 pPool->pszAccessHandler);
1604 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1605 * the heap size should suffice. */
1606 AssertFatalRC(rc);
1607 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1608 rc = VERR_PGM_POOL_CLEARED;
1609 }
1610 pPage->fMonitored = true;
1611 return rc;
1612}
1613
1614
1615/**
1616 * Disables write monitoring of a guest page.
1617 *
1618 * @returns VBox status code.
1619 * @retval VINF_SUCCESS on success.
1620 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1621 * @param pPool The pool.
1622 * @param pPage The cached page.
1623 */
1624static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1625{
1626 /*
1627 * Filter out the relevant kinds.
1628 */
1629 switch (pPage->enmKind)
1630 {
1631 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1632 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1633 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1634 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1635 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1636 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1637 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1638 case PGMPOOLKIND_ROOT_PDPT:
1639 break;
1640
1641 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1642 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1643 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1644 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1645 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1646 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1647 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1648 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1649 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1650 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1651 case PGMPOOLKIND_ROOT_NESTED:
1652 /* Nothing to monitor here. */
1653 return VINF_SUCCESS;
1654
1655 case PGMPOOLKIND_ROOT_32BIT_PD:
1656 case PGMPOOLKIND_ROOT_PAE_PD:
1657#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1658 break;
1659#endif
1660 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1661 default:
1662 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1663 }
1664
1665 /*
1666 * Remove the page from the monitored list or uninstall it if last.
1667 */
1668 const PVM pVM = pPool->CTX_SUFF(pVM);
1669 int rc;
1670 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1671 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1672 {
1673 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1674 {
1675 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1676 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1677 pNewHead->fCR3Mix = pPage->fCR3Mix;
1678 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1679 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1680 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1681 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
1682 pPool->pszAccessHandler);
1683 AssertFatalRCSuccess(rc);
1684 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1685 }
1686 else
1687 {
1688 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1689 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1690 {
1691 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1692 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1693 }
1694 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1695 rc = VINF_SUCCESS;
1696 }
1697 }
1698 else
1699 {
1700 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1701 AssertFatalRC(rc);
1702 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1703 rc = VERR_PGM_POOL_CLEARED;
1704 }
1705 pPage->fMonitored = false;
1706
1707 /*
1708 * Remove it from the list of modified pages (if in it).
1709 */
1710 pgmPoolMonitorModifiedRemove(pPool, pPage);
1711
1712 return rc;
1713}
1714
1715# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1716
1717/**
1718 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1719 *
1720 * @param pPool The Pool.
1721 * @param pPage A page in the chain.
1722 * @param fCR3Mix The new fCR3Mix value.
1723 */
1724static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1725{
1726 /* current */
1727 pPage->fCR3Mix = fCR3Mix;
1728
1729 /* before */
1730 int16_t idx = pPage->iMonitoredPrev;
1731 while (idx != NIL_PGMPOOL_IDX)
1732 {
1733 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1734 idx = pPool->aPages[idx].iMonitoredPrev;
1735 }
1736
1737 /* after */
1738 idx = pPage->iMonitoredNext;
1739 while (idx != NIL_PGMPOOL_IDX)
1740 {
1741 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1742 idx = pPool->aPages[idx].iMonitoredNext;
1743 }
1744}
1745
1746
1747/**
1748 * Installs or modifies monitoring of a CR3 page (special).
1749 *
1750 * We're pretending the CR3 page is shadowed by the pool so we can use the
1751 * generic mechanisms in detecting chained monitoring. (This also gives us a
1752 * tast of what code changes are required to really pool CR3 shadow pages.)
1753 *
1754 * @returns VBox status code.
1755 * @param pPool The pool.
1756 * @param idxRoot The CR3 (root) page index.
1757 * @param GCPhysCR3 The (new) CR3 value.
1758 */
1759int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1760{
1761 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1762 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1763 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d} GCPhysCR3=%RGp\n",
1764 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1765
1766 /*
1767 * The unlikely case where it already matches.
1768 */
1769 if (pPage->GCPhys == GCPhysCR3)
1770 {
1771 Assert(pPage->fMonitored);
1772 return VINF_SUCCESS;
1773 }
1774
1775 /*
1776 * Flush the current monitoring and remove it from the hash.
1777 */
1778 int rc = VINF_SUCCESS;
1779 if (pPage->fMonitored)
1780 {
1781 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1782 rc = pgmPoolMonitorFlush(pPool, pPage);
1783 if (rc == VERR_PGM_POOL_CLEARED)
1784 rc = VINF_SUCCESS;
1785 else
1786 AssertFatalRC(rc);
1787 pgmPoolHashRemove(pPool, pPage);
1788 }
1789
1790 /*
1791 * Monitor the page at the new location and insert it into the hash.
1792 */
1793 pPage->GCPhys = GCPhysCR3;
1794 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1795 if (rc2 != VERR_PGM_POOL_CLEARED)
1796 {
1797 AssertFatalRC(rc2);
1798 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1799 rc = rc2;
1800 }
1801 pgmPoolHashInsert(pPool, pPage);
1802 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1803 return rc;
1804}
1805
1806
1807/**
1808 * Removes the monitoring of a CR3 page (special).
1809 *
1810 * @returns VBox status code.
1811 * @param pPool The pool.
1812 * @param idxRoot The CR3 (root) page index.
1813 */
1814int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1815{
1816 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1817 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1818 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d}\n",
1819 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1820
1821 if (!pPage->fMonitored)
1822 return VINF_SUCCESS;
1823
1824 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1825 int rc = pgmPoolMonitorFlush(pPool, pPage);
1826 if (rc != VERR_PGM_POOL_CLEARED)
1827 AssertFatalRC(rc);
1828 else
1829 rc = VINF_SUCCESS;
1830 pgmPoolHashRemove(pPool, pPage);
1831 Assert(!pPage->fMonitored);
1832 pPage->GCPhys = NIL_RTGCPHYS;
1833 return rc;
1834}
1835
1836# endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1837
1838/**
1839 * Inserts the page into the list of modified pages.
1840 *
1841 * @param pPool The pool.
1842 * @param pPage The page.
1843 */
1844void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1845{
1846 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1847 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1848 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1849 && pPool->iModifiedHead != pPage->idx,
1850 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1851 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1852 pPool->iModifiedHead, pPool->cModifiedPages));
1853
1854 pPage->iModifiedNext = pPool->iModifiedHead;
1855 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1856 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1857 pPool->iModifiedHead = pPage->idx;
1858 pPool->cModifiedPages++;
1859#ifdef VBOX_WITH_STATISTICS
1860 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1861 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1862#endif
1863}
1864
1865
1866/**
1867 * Removes the page from the list of modified pages and resets the
1868 * moficiation counter.
1869 *
1870 * @param pPool The pool.
1871 * @param pPage The page which is believed to be in the list of modified pages.
1872 */
1873static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1874{
1875 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1876 if (pPool->iModifiedHead == pPage->idx)
1877 {
1878 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1879 pPool->iModifiedHead = pPage->iModifiedNext;
1880 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1881 {
1882 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1883 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1884 }
1885 pPool->cModifiedPages--;
1886 }
1887 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1888 {
1889 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1890 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1891 {
1892 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1893 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1894 }
1895 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1896 pPool->cModifiedPages--;
1897 }
1898 else
1899 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1900 pPage->cModifications = 0;
1901}
1902
1903
1904/**
1905 * Zaps the list of modified pages, resetting their modification counters in the process.
1906 *
1907 * @param pVM The VM handle.
1908 */
1909void pgmPoolMonitorModifiedClearAll(PVM pVM)
1910{
1911 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1912 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1913
1914 unsigned cPages = 0; NOREF(cPages);
1915 uint16_t idx = pPool->iModifiedHead;
1916 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1917 while (idx != NIL_PGMPOOL_IDX)
1918 {
1919 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1920 idx = pPage->iModifiedNext;
1921 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1922 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1923 pPage->cModifications = 0;
1924 Assert(++cPages);
1925 }
1926 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1927 pPool->cModifiedPages = 0;
1928}
1929
1930
1931#ifdef IN_RING3
1932/**
1933 * Clear all shadow pages and clear all modification counters.
1934 *
1935 * @param pVM The VM handle.
1936 * @remark Should only be used when monitoring is available, thus placed in
1937 * the PGMPOOL_WITH_MONITORING #ifdef.
1938 */
1939void pgmPoolClearAll(PVM pVM)
1940{
1941 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1942 STAM_PROFILE_START(&pPool->StatClearAll, c);
1943 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1944
1945 /*
1946 * Iterate all the pages until we've encountered all that in use.
1947 * This is simple but not quite optimal solution.
1948 */
1949 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1950 unsigned cLeft = pPool->cUsedPages;
1951 unsigned iPage = pPool->cCurPages;
1952 while (--iPage >= PGMPOOL_IDX_FIRST)
1953 {
1954 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1955 if (pPage->GCPhys != NIL_RTGCPHYS)
1956 {
1957 switch (pPage->enmKind)
1958 {
1959 /*
1960 * We only care about shadow page tables.
1961 */
1962 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1963 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1964 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1965 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1966 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1967 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1968 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1969 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1970 {
1971#ifdef PGMPOOL_WITH_USER_TRACKING
1972 if (pPage->cPresent)
1973#endif
1974 {
1975 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1976 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1977 ASMMemZeroPage(pvShw);
1978 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1979#ifdef PGMPOOL_WITH_USER_TRACKING
1980 pPage->cPresent = 0;
1981 pPage->iFirstPresent = ~0;
1982#endif
1983 }
1984 }
1985 /* fall thru */
1986
1987 default:
1988 Assert(!pPage->cModifications || ++cModifiedPages);
1989 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1990 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1991 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1992 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1993 pPage->cModifications = 0;
1994 break;
1995
1996 }
1997 if (!--cLeft)
1998 break;
1999 }
2000 }
2001
2002 /* swipe the special pages too. */
2003 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2004 {
2005 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2006 if (pPage->GCPhys != NIL_RTGCPHYS)
2007 {
2008 Assert(!pPage->cModifications || ++cModifiedPages);
2009 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2010 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2011 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2012 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2013 pPage->cModifications = 0;
2014 }
2015 }
2016
2017#ifndef DEBUG_michael
2018 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2019#endif
2020 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2021 pPool->cModifiedPages = 0;
2022
2023#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2024 /*
2025 * Clear all the GCPhys links and rebuild the phys ext free list.
2026 */
2027 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2028 pRam;
2029 pRam = pRam->CTX_SUFF(pNext))
2030 {
2031 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2032 while (iPage-- > 0)
2033 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2034 }
2035
2036 pPool->iPhysExtFreeHead = 0;
2037 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2038 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2039 for (unsigned i = 0; i < cMaxPhysExts; i++)
2040 {
2041 paPhysExts[i].iNext = i + 1;
2042 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2043 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2044 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2045 }
2046 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2047#endif
2048
2049
2050 pPool->cPresent = 0;
2051 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2052}
2053#endif /* IN_RING3 */
2054
2055
2056/**
2057 * Handle SyncCR3 pool tasks
2058 *
2059 * @returns VBox status code.
2060 * @retval VINF_SUCCESS if successfully added.
2061 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2062 * @param pVM The VM handle.
2063 * @remark Should only be used when monitoring is available, thus placed in
2064 * the PGMPOOL_WITH_MONITORING #ifdef.
2065 */
2066int pgmPoolSyncCR3(PVM pVM)
2067{
2068 /*
2069 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2070 * Occasionally we will have to clear all the shadow page tables because we wanted
2071 * to monitor a page which was mapped by too many shadowed page tables. This operation
2072 * sometimes refered to as a 'lightweight flush'.
2073 */
2074 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2075 pgmPoolMonitorModifiedClearAll(pVM);
2076 else
2077 {
2078# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2079 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2080 pgmPoolClearAll(pVM);
2081# else /* !IN_RING3 */
2082 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2083 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2084 return VINF_PGM_SYNC_CR3;
2085# endif /* !IN_RING3 */
2086 }
2087 return VINF_SUCCESS;
2088}
2089
2090#endif /* PGMPOOL_WITH_MONITORING */
2091#ifdef PGMPOOL_WITH_USER_TRACKING
2092
2093/**
2094 * Frees up at least one user entry.
2095 *
2096 * @returns VBox status code.
2097 * @retval VINF_SUCCESS if successfully added.
2098 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2099 * @param pPool The pool.
2100 * @param iUser The user index.
2101 */
2102static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2103{
2104 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2105#ifdef PGMPOOL_WITH_CACHE
2106 /*
2107 * Just free cached pages in a braindead fashion.
2108 */
2109 /** @todo walk the age list backwards and free the first with usage. */
2110 int rc = VINF_SUCCESS;
2111 do
2112 {
2113 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2114 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2115 rc = rc2;
2116 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2117 return rc;
2118#else
2119 /*
2120 * Lazy approach.
2121 */
2122 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2123 Assert(!CPUMIsGuestInLongMode(pVM));
2124 pgmPoolFlushAllInt(pPool);
2125 return VERR_PGM_POOL_FLUSHED;
2126#endif
2127}
2128
2129
2130/**
2131 * Inserts a page into the cache.
2132 *
2133 * This will create user node for the page, insert it into the GCPhys
2134 * hash, and insert it into the age list.
2135 *
2136 * @returns VBox status code.
2137 * @retval VINF_SUCCESS if successfully added.
2138 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2139 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2140 * @param pPool The pool.
2141 * @param pPage The cached page.
2142 * @param GCPhys The GC physical address of the page we're gonna shadow.
2143 * @param iUser The user index.
2144 * @param iUserTable The user table index.
2145 */
2146DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2147{
2148 int rc = VINF_SUCCESS;
2149 PPGMPOOLUSER pUser = pPool->CTX_SUFF(paUsers);
2150
2151 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
2152
2153 /*
2154 * Find free a user node.
2155 */
2156 uint16_t i = pPool->iUserFreeHead;
2157 if (i == NIL_PGMPOOL_USER_INDEX)
2158 {
2159 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2160 if (RT_FAILURE(rc))
2161 return rc;
2162 i = pPool->iUserFreeHead;
2163 }
2164
2165 /*
2166 * Unlink the user node from the free list,
2167 * initialize and insert it into the user list.
2168 */
2169 pPool->iUserFreeHead = pUser[i].iNext;
2170 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
2171 pUser[i].iUser = iUser;
2172 pUser[i].iUserTable = iUserTable;
2173 pPage->iUserHead = i;
2174
2175 /*
2176 * Insert into cache and enable monitoring of the guest page if enabled.
2177 *
2178 * Until we implement caching of all levels, including the CR3 one, we'll
2179 * have to make sure we don't try monitor & cache any recursive reuse of
2180 * a monitored CR3 page. Because all windows versions are doing this we'll
2181 * have to be able to do combined access monitoring, CR3 + PT and
2182 * PD + PT (guest PAE).
2183 *
2184 * Update:
2185 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2186 */
2187#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2188# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2189 const bool fCanBeMonitored = true;
2190# else
2191 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2192 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2193 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2194# endif
2195# ifdef PGMPOOL_WITH_CACHE
2196 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2197# endif
2198 if (fCanBeMonitored)
2199 {
2200# ifdef PGMPOOL_WITH_MONITORING
2201 rc = pgmPoolMonitorInsert(pPool, pPage);
2202 if (rc == VERR_PGM_POOL_CLEARED)
2203 {
2204 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2205# ifndef PGMPOOL_WITH_CACHE
2206 pgmPoolMonitorFlush(pPool, pPage);
2207 rc = VERR_PGM_POOL_FLUSHED;
2208# endif
2209 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2210 pUser[i].iNext = pPool->iUserFreeHead;
2211 pUser[i].iUser = NIL_PGMPOOL_IDX;
2212 pPool->iUserFreeHead = i;
2213 }
2214 }
2215# endif
2216#endif /* PGMPOOL_WITH_MONITORING */
2217 return rc;
2218}
2219
2220
2221# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2222/**
2223 * Adds a user reference to a page.
2224 *
2225 * This will
2226 * This will move the page to the head of the
2227 *
2228 * @returns VBox status code.
2229 * @retval VINF_SUCCESS if successfully added.
2230 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2231 * @param pPool The pool.
2232 * @param pPage The cached page.
2233 * @param iUser The user index.
2234 * @param iUserTable The user table.
2235 */
2236static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2237{
2238 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2239
2240 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
2241# ifdef VBOX_STRICT
2242 /*
2243 * Check that the entry doesn't already exists.
2244 */
2245 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2246 {
2247 uint16_t i = pPage->iUserHead;
2248 do
2249 {
2250 Assert(i < pPool->cMaxUsers);
2251 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2252 i = paUsers[i].iNext;
2253 } while (i != NIL_PGMPOOL_USER_INDEX);
2254 }
2255# endif
2256
2257 /*
2258 * Allocate a user node.
2259 */
2260 uint16_t i = pPool->iUserFreeHead;
2261 if (i == NIL_PGMPOOL_USER_INDEX)
2262 {
2263 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2264 if (RT_FAILURE(rc))
2265 return rc;
2266 i = pPool->iUserFreeHead;
2267 }
2268 pPool->iUserFreeHead = paUsers[i].iNext;
2269
2270 /*
2271 * Initialize the user node and insert it.
2272 */
2273 paUsers[i].iNext = pPage->iUserHead;
2274 paUsers[i].iUser = iUser;
2275 paUsers[i].iUserTable = iUserTable;
2276 pPage->iUserHead = i;
2277
2278# ifdef PGMPOOL_WITH_CACHE
2279 /*
2280 * Tell the cache to update its replacement stats for this page.
2281 */
2282 pgmPoolCacheUsed(pPool, pPage);
2283# endif
2284 return VINF_SUCCESS;
2285}
2286# endif /* PGMPOOL_WITH_CACHE */
2287
2288
2289/**
2290 * Frees a user record associated with a page.
2291 *
2292 * This does not clear the entry in the user table, it simply replaces the
2293 * user record to the chain of free records.
2294 *
2295 * @param pPool The pool.
2296 * @param HCPhys The HC physical address of the shadow page.
2297 * @param iUser The shadow page pool index of the user table.
2298 * @param iUserTable The index into the user table (shadowed).
2299 */
2300static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2301{
2302 /*
2303 * Unlink and free the specified user entry.
2304 */
2305 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2306
2307 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2308 uint16_t i = pPage->iUserHead;
2309 if ( i != NIL_PGMPOOL_USER_INDEX
2310 && paUsers[i].iUser == iUser
2311 && paUsers[i].iUserTable == iUserTable)
2312 {
2313 pPage->iUserHead = paUsers[i].iNext;
2314
2315 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2316 paUsers[i].iNext = pPool->iUserFreeHead;
2317 pPool->iUserFreeHead = i;
2318 return;
2319 }
2320
2321 /* General: Linear search. */
2322 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2323 while (i != NIL_PGMPOOL_USER_INDEX)
2324 {
2325 if ( paUsers[i].iUser == iUser
2326 && paUsers[i].iUserTable == iUserTable)
2327 {
2328 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2329 paUsers[iPrev].iNext = paUsers[i].iNext;
2330 else
2331 pPage->iUserHead = paUsers[i].iNext;
2332
2333 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2334 paUsers[i].iNext = pPool->iUserFreeHead;
2335 pPool->iUserFreeHead = i;
2336 return;
2337 }
2338 iPrev = i;
2339 i = paUsers[i].iNext;
2340 }
2341
2342 /* Fatal: didn't find it */
2343 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2344 iUser, iUserTable, pPage->GCPhys));
2345}
2346
2347
2348/**
2349 * Gets the entry size of a shadow table.
2350 *
2351 * @param enmKind The kind of page.
2352 *
2353 * @returns The size of the entry in bytes. That is, 4 or 8.
2354 * @returns If the kind is not for a table, an assertion is raised and 0 is
2355 * returned.
2356 */
2357DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2358{
2359 switch (enmKind)
2360 {
2361 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2362 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2363 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2364 case PGMPOOLKIND_ROOT_32BIT_PD:
2365 return 4;
2366
2367 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2368 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2369 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2370 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2371 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2372 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2373 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2374 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2375 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2376 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2377 case PGMPOOLKIND_ROOT_PAE_PD:
2378 case PGMPOOLKIND_ROOT_PDPT:
2379 case PGMPOOLKIND_ROOT_NESTED:
2380 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2381 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2382 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2383 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2384 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2385 return 8;
2386
2387 default:
2388 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2389 }
2390}
2391
2392
2393/**
2394 * Gets the entry size of a guest table.
2395 *
2396 * @param enmKind The kind of page.
2397 *
2398 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2399 * @returns If the kind is not for a table, an assertion is raised and 0 is
2400 * returned.
2401 */
2402DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2403{
2404 switch (enmKind)
2405 {
2406 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2407 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2408 case PGMPOOLKIND_ROOT_32BIT_PD:
2409 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2410 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2411 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2412 return 4;
2413
2414 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2415 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2416 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2417 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2418 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2419 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2420 case PGMPOOLKIND_ROOT_PAE_PD:
2421 case PGMPOOLKIND_ROOT_PDPT:
2422 return 8;
2423
2424 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2425 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2426 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2427 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2428 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2429 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2430 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2431 case PGMPOOLKIND_ROOT_NESTED:
2432 /** @todo can we return 0? (nobody is calling this...) */
2433 AssertFailed();
2434 return 0;
2435
2436 default:
2437 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2438 }
2439}
2440
2441#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2442
2443/**
2444 * Scans one shadow page table for mappings of a physical page.
2445 *
2446 * @param pVM The VM handle.
2447 * @param pPhysPage The guest page in question.
2448 * @param iShw The shadow page table.
2449 * @param cRefs The number of references made in that PT.
2450 */
2451static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2452{
2453 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2454 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2455
2456 /*
2457 * Assert sanity.
2458 */
2459 Assert(cRefs == 1);
2460 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2461 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2462
2463 /*
2464 * Then, clear the actual mappings to the page in the shadow PT.
2465 */
2466 switch (pPage->enmKind)
2467 {
2468 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2469 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2470 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2471 {
2472 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2473 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2474 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2475 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2476 {
2477 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2478 pPT->a[i].u = 0;
2479 cRefs--;
2480 if (!cRefs)
2481 return;
2482 }
2483#ifdef LOG_ENABLED
2484 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2485 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2486 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2487 {
2488 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2489 pPT->a[i].u = 0;
2490 }
2491#endif
2492 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2493 break;
2494 }
2495
2496 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2497 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2498 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2499 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2500 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2501 {
2502 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2503 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2504 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2505 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2506 {
2507 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2508 pPT->a[i].u = 0;
2509 cRefs--;
2510 if (!cRefs)
2511 return;
2512 }
2513#ifdef LOG_ENABLED
2514 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2515 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2516 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2517 {
2518 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2519 pPT->a[i].u = 0;
2520 }
2521#endif
2522 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
2523 break;
2524 }
2525
2526 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2527 {
2528 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2529 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2530 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2531 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2532 {
2533 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2534 pPT->a[i].u = 0;
2535 cRefs--;
2536 if (!cRefs)
2537 return;
2538 }
2539#ifdef LOG_ENABLED
2540 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2541 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2542 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2543 {
2544 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2545 pPT->a[i].u = 0;
2546 }
2547#endif
2548 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2549 break;
2550 }
2551
2552 default:
2553 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2554 }
2555}
2556
2557
2558/**
2559 * Scans one shadow page table for mappings of a physical page.
2560 *
2561 * @param pVM The VM handle.
2562 * @param pPhysPage The guest page in question.
2563 * @param iShw The shadow page table.
2564 * @param cRefs The number of references made in that PT.
2565 */
2566void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2567{
2568 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2569 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2570 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2571 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2572 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2573 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2574}
2575
2576
2577/**
2578 * Flushes a list of shadow page tables mapping the same physical page.
2579 *
2580 * @param pVM The VM handle.
2581 * @param pPhysPage The guest page in question.
2582 * @param iPhysExt The physical cross reference extent list to flush.
2583 */
2584void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2585{
2586 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2587 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2588 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2589
2590 const uint16_t iPhysExtStart = iPhysExt;
2591 PPGMPOOLPHYSEXT pPhysExt;
2592 do
2593 {
2594 Assert(iPhysExt < pPool->cMaxPhysExts);
2595 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2596 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2597 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2598 {
2599 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2600 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2601 }
2602
2603 /* next */
2604 iPhysExt = pPhysExt->iNext;
2605 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2606
2607 /* insert the list into the free list and clear the ram range entry. */
2608 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2609 pPool->iPhysExtFreeHead = iPhysExtStart;
2610 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2611
2612 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2613}
2614
2615#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2616
2617/**
2618 * Scans all shadow page tables for mappings of a physical page.
2619 *
2620 * This may be slow, but it's most likely more efficient than cleaning
2621 * out the entire page pool / cache.
2622 *
2623 * @returns VBox status code.
2624 * @retval VINF_SUCCESS if all references has been successfully cleared.
2625 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2626 * a page pool cleaning.
2627 *
2628 * @param pVM The VM handle.
2629 * @param pPhysPage The guest page in question.
2630 */
2631int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2632{
2633 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2634 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2635 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2636 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2637
2638#if 1
2639 /*
2640 * There is a limit to what makes sense.
2641 */
2642 if (pPool->cPresent > 1024)
2643 {
2644 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2645 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2646 return VINF_PGM_GCPHYS_ALIASED;
2647 }
2648#endif
2649
2650 /*
2651 * Iterate all the pages until we've encountered all that in use.
2652 * This is simple but not quite optimal solution.
2653 */
2654 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2655 const uint32_t u32 = u64;
2656 unsigned cLeft = pPool->cUsedPages;
2657 unsigned iPage = pPool->cCurPages;
2658 while (--iPage >= PGMPOOL_IDX_FIRST)
2659 {
2660 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2661 if (pPage->GCPhys != NIL_RTGCPHYS)
2662 {
2663 switch (pPage->enmKind)
2664 {
2665 /*
2666 * We only care about shadow page tables.
2667 */
2668 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2669 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2670 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2671 {
2672 unsigned cPresent = pPage->cPresent;
2673 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2674 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2675 if (pPT->a[i].n.u1Present)
2676 {
2677 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2678 {
2679 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2680 pPT->a[i].u = 0;
2681 }
2682 if (!--cPresent)
2683 break;
2684 }
2685 break;
2686 }
2687
2688 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2689 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2690 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2691 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2692 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2693 {
2694 unsigned cPresent = pPage->cPresent;
2695 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2696 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2697 if (pPT->a[i].n.u1Present)
2698 {
2699 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2700 {
2701 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2702 pPT->a[i].u = 0;
2703 }
2704 if (!--cPresent)
2705 break;
2706 }
2707 break;
2708 }
2709 }
2710 if (!--cLeft)
2711 break;
2712 }
2713 }
2714
2715 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2716 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2717 return VINF_SUCCESS;
2718}
2719
2720
2721/**
2722 * Clears the user entry in a user table.
2723 *
2724 * This is used to remove all references to a page when flushing it.
2725 */
2726static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2727{
2728 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2729 Assert(pUser->iUser < pPool->cCurPages);
2730 uint32_t iUserTable = pUser->iUserTable;
2731
2732 /*
2733 * Map the user page.
2734 */
2735 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2736#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2737 if (pUserPage->enmKind == PGMPOOLKIND_ROOT_PAE_PD)
2738 {
2739 /* Must translate the fake 2048 entry PD to a 512 PD one since the R0 mapping is not linear. */
2740 Assert(pUser->iUser == PGMPOOL_IDX_PAE_PD);
2741 uint32_t iPdpt = iUserTable / X86_PG_PAE_ENTRIES;
2742 iUserTable %= X86_PG_PAE_ENTRIES;
2743 pUserPage = &pPool->aPages[PGMPOOL_IDX_PAE_PD_0 + iPdpt];
2744 Assert(pUserPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD);
2745 }
2746#endif
2747 union
2748 {
2749 uint64_t *pau64;
2750 uint32_t *pau32;
2751 } u;
2752 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
2753
2754 /* Safety precaution in case we change the paging for other modes too in the future. */
2755 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
2756
2757#ifdef VBOX_STRICT
2758 /*
2759 * Some sanity checks.
2760 */
2761 switch (pUserPage->enmKind)
2762 {
2763 case PGMPOOLKIND_ROOT_32BIT_PD:
2764 Assert(iUserTable < X86_PG_ENTRIES);
2765 Assert(!(u.pau32[iUserTable] & PGM_PDFLAGS_MAPPING));
2766 break;
2767# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2768 case PGMPOOLKIND_ROOT_PAE_PD:
2769 Assert(iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2770 AssertMsg(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING), ("%llx %d\n", u.pau64[iUserTable], iUserTable));
2771 break;
2772# endif
2773 case PGMPOOLKIND_ROOT_PDPT:
2774 Assert(iUserTable < 4);
2775 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2776 break;
2777 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2778 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2779 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2780 break;
2781 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2782 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2783 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
2784 break;
2785 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2786 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2787 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2788 break;
2789 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2790 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2791 /* GCPhys >> PAGE_SHIFT is the index here */
2792 break;
2793 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2794 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2795 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2796 break;
2797
2798 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2799 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2800 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2801 break;
2802
2803 case PGMPOOLKIND_ROOT_NESTED:
2804 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2805 break;
2806
2807 default:
2808 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2809 break;
2810 }
2811#endif /* VBOX_STRICT */
2812
2813 /*
2814 * Clear the entry in the user page.
2815 */
2816 switch (pUserPage->enmKind)
2817 {
2818 /* 32-bit entries */
2819 case PGMPOOLKIND_ROOT_32BIT_PD:
2820 u.pau32[iUserTable] = 0;
2821 break;
2822
2823 /* 64-bit entries */
2824 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2825 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2826 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2827 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2828 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2829 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2830 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2831#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2832 case PGMPOOLKIND_ROOT_PAE_PD:
2833#endif
2834 case PGMPOOLKIND_ROOT_PDPT:
2835 case PGMPOOLKIND_ROOT_NESTED:
2836 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2837 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2838 u.pau64[iUserTable] = 0;
2839 break;
2840
2841 default:
2842 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2843 }
2844}
2845
2846
2847/**
2848 * Clears all users of a page.
2849 */
2850static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2851{
2852 /*
2853 * Free all the user records.
2854 */
2855 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2856 uint16_t i = pPage->iUserHead;
2857 while (i != NIL_PGMPOOL_USER_INDEX)
2858 {
2859 /* Clear enter in user table. */
2860 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2861
2862 /* Free it. */
2863 const uint16_t iNext = paUsers[i].iNext;
2864 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2865 paUsers[i].iNext = pPool->iUserFreeHead;
2866 pPool->iUserFreeHead = i;
2867
2868 /* Next. */
2869 i = iNext;
2870 }
2871 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2872}
2873
2874#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2875
2876/**
2877 * Allocates a new physical cross reference extent.
2878 *
2879 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2880 * @param pVM The VM handle.
2881 * @param piPhysExt Where to store the phys ext index.
2882 */
2883PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2884{
2885 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2886 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2887 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2888 {
2889 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2890 return NULL;
2891 }
2892 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2893 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2894 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2895 *piPhysExt = iPhysExt;
2896 return pPhysExt;
2897}
2898
2899
2900/**
2901 * Frees a physical cross reference extent.
2902 *
2903 * @param pVM The VM handle.
2904 * @param iPhysExt The extent to free.
2905 */
2906void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2907{
2908 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2909 Assert(iPhysExt < pPool->cMaxPhysExts);
2910 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2911 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2912 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2913 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2914 pPool->iPhysExtFreeHead = iPhysExt;
2915}
2916
2917
2918/**
2919 * Frees a physical cross reference extent.
2920 *
2921 * @param pVM The VM handle.
2922 * @param iPhysExt The extent to free.
2923 */
2924void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2925{
2926 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2927
2928 const uint16_t iPhysExtStart = iPhysExt;
2929 PPGMPOOLPHYSEXT pPhysExt;
2930 do
2931 {
2932 Assert(iPhysExt < pPool->cMaxPhysExts);
2933 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2934 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2935 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2936
2937 /* next */
2938 iPhysExt = pPhysExt->iNext;
2939 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2940
2941 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2942 pPool->iPhysExtFreeHead = iPhysExtStart;
2943}
2944
2945
2946/**
2947 * Insert a reference into a list of physical cross reference extents.
2948 *
2949 * @returns The new ram range flags (top 16-bits).
2950 *
2951 * @param pVM The VM handle.
2952 * @param iPhysExt The physical extent index of the list head.
2953 * @param iShwPT The shadow page table index.
2954 *
2955 */
2956static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2957{
2958 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2959 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2960
2961 /* special common case. */
2962 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2963 {
2964 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2965 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2966 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2967 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2968 }
2969
2970 /* general treatment. */
2971 const uint16_t iPhysExtStart = iPhysExt;
2972 unsigned cMax = 15;
2973 for (;;)
2974 {
2975 Assert(iPhysExt < pPool->cMaxPhysExts);
2976 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2977 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2978 {
2979 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2980 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2981 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2982 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2983 }
2984 if (!--cMax)
2985 {
2986 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2987 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2988 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2989 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2990 }
2991 }
2992
2993 /* add another extent to the list. */
2994 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2995 if (!pNew)
2996 {
2997 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2998 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2999 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3000 }
3001 pNew->iNext = iPhysExtStart;
3002 pNew->aidx[0] = iShwPT;
3003 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3004 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3005}
3006
3007
3008/**
3009 * Add a reference to guest physical page where extents are in use.
3010 *
3011 * @returns The new ram range flags (top 16-bits).
3012 *
3013 * @param pVM The VM handle.
3014 * @param u16 The ram range flags (top 16-bits).
3015 * @param iShwPT The shadow page table index.
3016 */
3017uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3018{
3019 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
3020 {
3021 /*
3022 * Convert to extent list.
3023 */
3024 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
3025 uint16_t iPhysExt;
3026 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3027 if (pPhysExt)
3028 {
3029 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
3030 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3031 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
3032 pPhysExt->aidx[1] = iShwPT;
3033 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3034 }
3035 else
3036 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3037 }
3038 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
3039 {
3040 /*
3041 * Insert into the extent list.
3042 */
3043 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
3044 }
3045 else
3046 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3047 return u16;
3048}
3049
3050
3051/**
3052 * Clear references to guest physical memory.
3053 *
3054 * @param pPool The pool.
3055 * @param pPage The page.
3056 * @param pPhysPage Pointer to the aPages entry in the ram range.
3057 */
3058void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3059{
3060 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
3061 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3062
3063 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
3064 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
3065 {
3066 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3067 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3068 do
3069 {
3070 Assert(iPhysExt < pPool->cMaxPhysExts);
3071
3072 /*
3073 * Look for the shadow page and check if it's all freed.
3074 */
3075 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3076 {
3077 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3078 {
3079 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3080
3081 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3082 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3083 {
3084 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3085 return;
3086 }
3087
3088 /* we can free the node. */
3089 PVM pVM = pPool->CTX_SUFF(pVM);
3090 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3091 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3092 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3093 {
3094 /* lonely node */
3095 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3096 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
3097 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3098 }
3099 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3100 {
3101 /* head */
3102 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
3103 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
3104 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
3105 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
3106 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3107 }
3108 else
3109 {
3110 /* in list */
3111 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3112 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3113 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3114 }
3115 iPhysExt = iPhysExtNext;
3116 return;
3117 }
3118 }
3119
3120 /* next */
3121 iPhysExtPrev = iPhysExt;
3122 iPhysExt = paPhysExts[iPhysExt].iNext;
3123 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3124
3125 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3126 }
3127 else /* nothing to do */
3128 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
3129}
3130
3131
3132/**
3133 * Clear references to guest physical memory.
3134 *
3135 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3136 * is assumed to be correct, so the linear search can be skipped and we can assert
3137 * at an earlier point.
3138 *
3139 * @param pPool The pool.
3140 * @param pPage The page.
3141 * @param HCPhys The host physical address corresponding to the guest page.
3142 * @param GCPhys The guest physical address corresponding to HCPhys.
3143 */
3144static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3145{
3146 /*
3147 * Walk range list.
3148 */
3149 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3150 while (pRam)
3151 {
3152 RTGCPHYS off = GCPhys - pRam->GCPhys;
3153 if (off < pRam->cb)
3154 {
3155 /* does it match? */
3156 const unsigned iPage = off >> PAGE_SHIFT;
3157 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3158#ifdef LOG_ENABLED
3159RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3160Log(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3161#endif
3162 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3163 {
3164 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3165 return;
3166 }
3167 break;
3168 }
3169 pRam = pRam->CTX_SUFF(pNext);
3170 }
3171 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3172}
3173
3174
3175/**
3176 * Clear references to guest physical memory.
3177 *
3178 * @param pPool The pool.
3179 * @param pPage The page.
3180 * @param HCPhys The host physical address corresponding to the guest page.
3181 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3182 */
3183static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3184{
3185 /*
3186 * Walk range list.
3187 */
3188 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3189 while (pRam)
3190 {
3191 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3192 if (off < pRam->cb)
3193 {
3194 /* does it match? */
3195 const unsigned iPage = off >> PAGE_SHIFT;
3196 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3197 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3198 {
3199 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3200 return;
3201 }
3202 break;
3203 }
3204 pRam = pRam->CTX_SUFF(pNext);
3205 }
3206
3207 /*
3208 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3209 */
3210 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3211 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3212 while (pRam)
3213 {
3214 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3215 while (iPage-- > 0)
3216 {
3217 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3218 {
3219 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3220 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3221 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3222 return;
3223 }
3224 }
3225 pRam = pRam->CTX_SUFF(pNext);
3226 }
3227
3228 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3229}
3230
3231
3232/**
3233 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3234 *
3235 * @param pPool The pool.
3236 * @param pPage The page.
3237 * @param pShwPT The shadow page table (mapping of the page).
3238 * @param pGstPT The guest page table.
3239 */
3240DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3241{
3242 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3243 if (pShwPT->a[i].n.u1Present)
3244 {
3245 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3246 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3247 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3248 if (!--pPage->cPresent)
3249 break;
3250 }
3251}
3252
3253
3254/**
3255 * Clear references to guest physical memory in a PAE / 32-bit page table.
3256 *
3257 * @param pPool The pool.
3258 * @param pPage The page.
3259 * @param pShwPT The shadow page table (mapping of the page).
3260 * @param pGstPT The guest page table (just a half one).
3261 */
3262DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3263{
3264 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3265 if (pShwPT->a[i].n.u1Present)
3266 {
3267 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3268 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3269 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3270 }
3271}
3272
3273
3274/**
3275 * Clear references to guest physical memory in a PAE / PAE page table.
3276 *
3277 * @param pPool The pool.
3278 * @param pPage The page.
3279 * @param pShwPT The shadow page table (mapping of the page).
3280 * @param pGstPT The guest page table.
3281 */
3282DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3283{
3284 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3285 if (pShwPT->a[i].n.u1Present)
3286 {
3287 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3288 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3289 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3290 }
3291}
3292
3293
3294/**
3295 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3296 *
3297 * @param pPool The pool.
3298 * @param pPage The page.
3299 * @param pShwPT The shadow page table (mapping of the page).
3300 */
3301DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3302{
3303 RTGCPHYS GCPhys = pPage->GCPhys;
3304 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3305 if (pShwPT->a[i].n.u1Present)
3306 {
3307 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3308 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3309 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3310 }
3311}
3312
3313
3314/**
3315 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3316 *
3317 * @param pPool The pool.
3318 * @param pPage The page.
3319 * @param pShwPT The shadow page table (mapping of the page).
3320 */
3321DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3322{
3323 RTGCPHYS GCPhys = pPage->GCPhys;
3324 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3325 if (pShwPT->a[i].n.u1Present)
3326 {
3327 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3328 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3329 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3330 }
3331}
3332
3333#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3334
3335/**
3336 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3337 *
3338 * @param pPool The pool.
3339 * @param pPage The page.
3340 * @param pShwPD The shadow page directory (mapping of the page).
3341 */
3342DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3343{
3344 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3345 {
3346 if (pShwPD->a[i].n.u1Present)
3347 {
3348 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3349 if (pSubPage)
3350 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3351 else
3352 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3353 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3354 }
3355 }
3356}
3357
3358
3359/**
3360 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3361 *
3362 * @param pPool The pool.
3363 * @param pPage The page.
3364 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3365 */
3366DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3367{
3368 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3369 {
3370 if (pShwPDPT->a[i].n.u1Present)
3371 {
3372 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3373 if (pSubPage)
3374 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3375 else
3376 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3377 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3378 }
3379 }
3380}
3381
3382
3383/**
3384 * Clear references to shadowed pages in a 64-bit level 4 page table.
3385 *
3386 * @param pPool The pool.
3387 * @param pPage The page.
3388 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3389 */
3390DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3391{
3392 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3393 {
3394 if (pShwPML4->a[i].n.u1Present)
3395 {
3396 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3397 if (pSubPage)
3398 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3399 else
3400 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3401 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3402 }
3403 }
3404}
3405
3406
3407/**
3408 * Clear references to shadowed pages in an EPT page table.
3409 *
3410 * @param pPool The pool.
3411 * @param pPage The page.
3412 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3413 */
3414DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3415{
3416 RTGCPHYS GCPhys = pPage->GCPhys;
3417 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3418 if (pShwPT->a[i].n.u1Present)
3419 {
3420 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3421 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3422 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3423 }
3424}
3425
3426
3427/**
3428 * Clear references to shadowed pages in an EPT page directory.
3429 *
3430 * @param pPool The pool.
3431 * @param pPage The page.
3432 * @param pShwPD The shadow page directory (mapping of the page).
3433 */
3434DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3435{
3436 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3437 {
3438 if (pShwPD->a[i].n.u1Present)
3439 {
3440 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3441 if (pSubPage)
3442 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3443 else
3444 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3445 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3446 }
3447 }
3448}
3449
3450
3451/**
3452 * Clear references to shadowed pages in an EPT page directory pointer table.
3453 *
3454 * @param pPool The pool.
3455 * @param pPage The page.
3456 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3457 */
3458DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3459{
3460 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3461 {
3462 if (pShwPDPT->a[i].n.u1Present)
3463 {
3464 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3465 if (pSubPage)
3466 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3467 else
3468 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3469 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3470 }
3471 }
3472}
3473
3474
3475/**
3476 * Clears all references made by this page.
3477 *
3478 * This includes other shadow pages and GC physical addresses.
3479 *
3480 * @param pPool The pool.
3481 * @param pPage The page.
3482 */
3483static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3484{
3485 /*
3486 * Map the shadow page and take action according to the page kind.
3487 */
3488 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3489 switch (pPage->enmKind)
3490 {
3491#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3492 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3493 {
3494 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3495 void *pvGst;
3496 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3497 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3498 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3499 break;
3500 }
3501
3502 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3503 {
3504 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3505 void *pvGst;
3506 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3507 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3508 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3509 break;
3510 }
3511
3512 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3513 {
3514 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3515 void *pvGst;
3516 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3517 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3518 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3519 break;
3520 }
3521
3522 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3523 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3524 {
3525 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3526 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3527 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3528 break;
3529 }
3530
3531 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3532 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3533 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3534 {
3535 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3536 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3537 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3538 break;
3539 }
3540
3541#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3542 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3543 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3544 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3545 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3546 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3547 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3548 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3549 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3550 break;
3551#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3552
3553 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
3554 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3555 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3556 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3557 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3558 break;
3559
3560 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3561 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3562 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3563 break;
3564
3565 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
3566 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3567 break;
3568
3569 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3570 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
3571 break;
3572
3573 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3574 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
3575 break;
3576
3577 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3578 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
3579 break;
3580
3581 default:
3582 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3583 }
3584
3585 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3586 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3587 ASMMemZeroPage(pvShw);
3588 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3589 pPage->fZeroed = true;
3590}
3591
3592#endif /* PGMPOOL_WITH_USER_TRACKING */
3593
3594/**
3595 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3596 *
3597 * @param pPool The pool.
3598 */
3599static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3600{
3601 /*
3602 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3603 */
3604 Assert(NIL_PGMPOOL_IDX == 0);
3605 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3606 {
3607 /*
3608 * Get the page address.
3609 */
3610 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3611 union
3612 {
3613 uint64_t *pau64;
3614 uint32_t *pau32;
3615 } u;
3616
3617 /*
3618 * Mark stuff not present.
3619 */
3620 switch (pPage->enmKind)
3621 {
3622 case PGMPOOLKIND_ROOT_32BIT_PD:
3623 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3624 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3625 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3626 u.pau32[iPage] = 0;
3627 break;
3628
3629 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3630 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3631 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
3632 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3633 u.pau64[iPage] = 0;
3634 break;
3635
3636 case PGMPOOLKIND_ROOT_PDPT:
3637 /* Not root of shadowed pages currently, ignore it. */
3638 break;
3639
3640 case PGMPOOLKIND_ROOT_NESTED:
3641 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3642 ASMMemZero32(u.pau64, PAGE_SIZE);
3643 break;
3644 }
3645 }
3646
3647 /*
3648 * Paranoia (to be removed), flag a global CR3 sync.
3649 */
3650 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
3651}
3652
3653
3654/**
3655 * Flushes the entire cache.
3656 *
3657 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3658 * and execute this CR3 flush.
3659 *
3660 * @param pPool The pool.
3661 */
3662static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3663{
3664 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3665 LogFlow(("pgmPoolFlushAllInt:\n"));
3666
3667 /*
3668 * If there are no pages in the pool, there is nothing to do.
3669 */
3670 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3671 {
3672 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3673 return;
3674 }
3675
3676 /*
3677 * Nuke the free list and reinsert all pages into it.
3678 */
3679 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3680 {
3681 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3682
3683#ifdef IN_RING3
3684 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMR3, pPage->pvPageR3));
3685#endif
3686#ifdef PGMPOOL_WITH_MONITORING
3687 if (pPage->fMonitored)
3688 pgmPoolMonitorFlush(pPool, pPage);
3689 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3690 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3691 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3692 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3693 pPage->cModifications = 0;
3694#endif
3695 pPage->GCPhys = NIL_RTGCPHYS;
3696 pPage->enmKind = PGMPOOLKIND_FREE;
3697 Assert(pPage->idx == i);
3698 pPage->iNext = i + 1;
3699 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3700 pPage->fSeenNonGlobal = false;
3701 pPage->fMonitored= false;
3702 pPage->fCached = false;
3703 pPage->fReusedFlushPending = false;
3704 pPage->fCR3Mix = false;
3705#ifdef PGMPOOL_WITH_USER_TRACKING
3706 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3707#endif
3708#ifdef PGMPOOL_WITH_CACHE
3709 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3710 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3711#endif
3712 }
3713 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3714 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3715 pPool->cUsedPages = 0;
3716
3717#ifdef PGMPOOL_WITH_USER_TRACKING
3718 /*
3719 * Zap and reinitialize the user records.
3720 */
3721 pPool->cPresent = 0;
3722 pPool->iUserFreeHead = 0;
3723 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3724 const unsigned cMaxUsers = pPool->cMaxUsers;
3725 for (unsigned i = 0; i < cMaxUsers; i++)
3726 {
3727 paUsers[i].iNext = i + 1;
3728 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3729 paUsers[i].iUserTable = 0xfffffffe;
3730 }
3731 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3732#endif
3733
3734#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3735 /*
3736 * Clear all the GCPhys links and rebuild the phys ext free list.
3737 */
3738 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3739 pRam;
3740 pRam = pRam->CTX_SUFF(pNext))
3741 {
3742 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3743 while (iPage-- > 0)
3744 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3745 }
3746
3747 pPool->iPhysExtFreeHead = 0;
3748 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3749 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3750 for (unsigned i = 0; i < cMaxPhysExts; i++)
3751 {
3752 paPhysExts[i].iNext = i + 1;
3753 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3754 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3755 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3756 }
3757 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3758#endif
3759
3760#ifdef PGMPOOL_WITH_MONITORING
3761 /*
3762 * Just zap the modified list.
3763 */
3764 pPool->cModifiedPages = 0;
3765 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3766#endif
3767
3768#ifdef PGMPOOL_WITH_CACHE
3769 /*
3770 * Clear the GCPhys hash and the age list.
3771 */
3772 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
3773 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3774 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3775 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3776#endif
3777
3778 /*
3779 * Flush all the special root pages.
3780 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3781 */
3782 pgmPoolFlushAllSpecialRoots(pPool);
3783 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3784 {
3785 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3786 pPage->iNext = NIL_PGMPOOL_IDX;
3787#ifdef PGMPOOL_WITH_MONITORING
3788 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3789 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3790 pPage->cModifications = 0;
3791 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3792 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3793 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3794 if (pPage->fMonitored)
3795 {
3796 PVM pVM = pPool->CTX_SUFF(pVM);
3797 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3798 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3799 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3800 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
3801 pPool->pszAccessHandler);
3802 AssertFatalRCSuccess(rc);
3803# ifdef PGMPOOL_WITH_CACHE
3804 pgmPoolHashInsert(pPool, pPage);
3805# endif
3806 }
3807#endif
3808#ifdef PGMPOOL_WITH_USER_TRACKING
3809 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3810#endif
3811#ifdef PGMPOOL_WITH_CACHE
3812 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3813 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3814#endif
3815 }
3816
3817 /*
3818 * Finally, assert the FF.
3819 */
3820 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
3821
3822 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3823}
3824
3825
3826/**
3827 * Flushes a pool page.
3828 *
3829 * This moves the page to the free list after removing all user references to it.
3830 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3831 *
3832 * @returns VBox status code.
3833 * @retval VINF_SUCCESS on success.
3834 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3835 * @param pPool The pool.
3836 * @param HCPhys The HC physical address of the shadow page.
3837 */
3838int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3839{
3840 int rc = VINF_SUCCESS;
3841 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3842 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%d, .GCPhys=%RGp}\n",
3843 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3844
3845 /*
3846 * Quietly reject any attempts at flushing any of the special root pages.
3847 */
3848 if (pPage->idx < PGMPOOL_IDX_FIRST)
3849 {
3850 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3851 return VINF_SUCCESS;
3852 }
3853
3854 /*
3855 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
3856 */
3857 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
3858 {
3859 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4,
3860 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
3861 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3862 return VINF_SUCCESS;
3863 }
3864
3865 /*
3866 * Mark the page as being in need of a ASMMemZeroPage().
3867 */
3868 pPage->fZeroed = false;
3869
3870#ifdef PGMPOOL_WITH_USER_TRACKING
3871 /*
3872 * Clear the page.
3873 */
3874 pgmPoolTrackClearPageUsers(pPool, pPage);
3875 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3876 pgmPoolTrackDeref(pPool, pPage);
3877 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3878#endif
3879
3880#ifdef PGMPOOL_WITH_CACHE
3881 /*
3882 * Flush it from the cache.
3883 */
3884 pgmPoolCacheFlushPage(pPool, pPage);
3885#endif /* PGMPOOL_WITH_CACHE */
3886
3887#ifdef PGMPOOL_WITH_MONITORING
3888 /*
3889 * Deregistering the monitoring.
3890 */
3891 if (pPage->fMonitored)
3892 rc = pgmPoolMonitorFlush(pPool, pPage);
3893#endif
3894
3895 /*
3896 * Free the page.
3897 */
3898 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3899 pPage->iNext = pPool->iFreeHead;
3900 pPool->iFreeHead = pPage->idx;
3901 pPage->enmKind = PGMPOOLKIND_FREE;
3902 pPage->GCPhys = NIL_RTGCPHYS;
3903 pPage->fReusedFlushPending = false;
3904
3905 pPool->cUsedPages--;
3906 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3907 return rc;
3908}
3909
3910
3911/**
3912 * Frees a usage of a pool page.
3913 *
3914 * The caller is responsible to updating the user table so that it no longer
3915 * references the shadow page.
3916 *
3917 * @param pPool The pool.
3918 * @param HCPhys The HC physical address of the shadow page.
3919 * @param iUser The shadow page pool index of the user table.
3920 * @param iUserTable The index into the user table (shadowed).
3921 */
3922void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3923{
3924 STAM_PROFILE_START(&pPool->StatFree, a);
3925 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3926 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3927 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3928#ifdef PGMPOOL_WITH_USER_TRACKING
3929 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3930#endif
3931#ifdef PGMPOOL_WITH_CACHE
3932 if (!pPage->fCached)
3933#endif
3934 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3935 STAM_PROFILE_STOP(&pPool->StatFree, a);
3936}
3937
3938
3939/**
3940 * Makes one or more free page free.
3941 *
3942 * @returns VBox status code.
3943 * @retval VINF_SUCCESS on success.
3944 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3945 *
3946 * @param pPool The pool.
3947 * @param iUser The user of the page.
3948 */
3949static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3950{
3951 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3952
3953 /*
3954 * If the pool isn't full grown yet, expand it.
3955 */
3956 if (pPool->cCurPages < pPool->cMaxPages)
3957 {
3958 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3959#ifdef IN_RING3
3960 int rc = PGMR3PoolGrow(pPool->pVMR3);
3961#else
3962 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3963#endif
3964 if (RT_FAILURE(rc))
3965 return rc;
3966 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3967 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3968 return VINF_SUCCESS;
3969 }
3970
3971#ifdef PGMPOOL_WITH_CACHE
3972 /*
3973 * Free one cached page.
3974 */
3975 return pgmPoolCacheFreeOne(pPool, iUser);
3976#else
3977 /*
3978 * Flush the pool.
3979 *
3980 * If we have tracking enabled, it should be possible to come up with
3981 * a cheap replacement strategy...
3982 */
3983 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
3984 Assert(!CPUMIsGuestInLongMode(pVM));
3985 pgmPoolFlushAllInt(pPool);
3986 return VERR_PGM_POOL_FLUSHED;
3987#endif
3988}
3989
3990
3991/**
3992 * Allocates a page from the pool.
3993 *
3994 * This page may actually be a cached page and not in need of any processing
3995 * on the callers part.
3996 *
3997 * @returns VBox status code.
3998 * @retval VINF_SUCCESS if a NEW page was allocated.
3999 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4000 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4001 * @param pVM The VM handle.
4002 * @param GCPhys The GC physical address of the page we're gonna shadow.
4003 * For 4MB and 2MB PD entries, it's the first address the
4004 * shadow PT is covering.
4005 * @param enmKind The kind of mapping.
4006 * @param iUser The shadow page pool index of the user table.
4007 * @param iUserTable The index into the user table (shadowed).
4008 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4009 */
4010int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
4011{
4012 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4013 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4014 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
4015 *ppPage = NULL;
4016 Assert(!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL));
4017
4018#ifdef PGMPOOL_WITH_CACHE
4019 if (pPool->fCacheEnabled)
4020 {
4021 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
4022 if (RT_SUCCESS(rc2))
4023 {
4024 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4025 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4026 return rc2;
4027 }
4028 }
4029#endif
4030
4031 /*
4032 * Allocate a new one.
4033 */
4034 int rc = VINF_SUCCESS;
4035 uint16_t iNew = pPool->iFreeHead;
4036 if (iNew == NIL_PGMPOOL_IDX)
4037 {
4038 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
4039 if (RT_FAILURE(rc))
4040 {
4041 if (rc != VERR_PGM_POOL_CLEARED)
4042 {
4043 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4044 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4045 return rc;
4046 }
4047 Log(("pgmPoolMakeMoreFreePages failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
4048 rc = VERR_PGM_POOL_FLUSHED;
4049 }
4050 iNew = pPool->iFreeHead;
4051 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4052 }
4053
4054 /* unlink the free head */
4055 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4056 pPool->iFreeHead = pPage->iNext;
4057 pPage->iNext = NIL_PGMPOOL_IDX;
4058
4059 /*
4060 * Initialize it.
4061 */
4062 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4063 pPage->enmKind = enmKind;
4064 pPage->GCPhys = GCPhys;
4065 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4066 pPage->fMonitored = false;
4067 pPage->fCached = false;
4068 pPage->fReusedFlushPending = false;
4069 pPage->fCR3Mix = false;
4070#ifdef PGMPOOL_WITH_MONITORING
4071 pPage->cModifications = 0;
4072 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4073 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4074#endif
4075#ifdef PGMPOOL_WITH_USER_TRACKING
4076 pPage->cPresent = 0;
4077 pPage->iFirstPresent = ~0;
4078
4079 /*
4080 * Insert into the tracking and cache. If this fails, free the page.
4081 */
4082 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4083 if (RT_FAILURE(rc3))
4084 {
4085 if (rc3 != VERR_PGM_POOL_CLEARED)
4086 {
4087 pPool->cUsedPages--;
4088 pPage->enmKind = PGMPOOLKIND_FREE;
4089 pPage->GCPhys = NIL_RTGCPHYS;
4090 pPage->iNext = pPool->iFreeHead;
4091 pPool->iFreeHead = pPage->idx;
4092 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4093 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4094 return rc3;
4095 }
4096 Log(("pgmPoolTrackInsert failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4097 rc = VERR_PGM_POOL_FLUSHED;
4098 }
4099#endif /* PGMPOOL_WITH_USER_TRACKING */
4100
4101 /*
4102 * Commit the allocation, clear the page and return.
4103 */
4104#ifdef VBOX_WITH_STATISTICS
4105 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4106 pPool->cUsedPagesHigh = pPool->cUsedPages;
4107#endif
4108
4109 if (!pPage->fZeroed)
4110 {
4111 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4112 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4113 ASMMemZeroPage(pv);
4114 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4115 }
4116
4117 *ppPage = pPage;
4118 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4119 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4120 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4121 return rc;
4122}
4123
4124
4125/**
4126 * Frees a usage of a pool page.
4127 *
4128 * @param pVM The VM handle.
4129 * @param HCPhys The HC physical address of the shadow page.
4130 * @param iUser The shadow page pool index of the user table.
4131 * @param iUserTable The index into the user table (shadowed).
4132 */
4133void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4134{
4135 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4136 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4137 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4138}
4139
4140
4141/**
4142 * Gets a in-use page in the pool by it's physical address.
4143 *
4144 * @returns Pointer to the page.
4145 * @param pVM The VM handle.
4146 * @param HCPhys The HC physical address of the shadow page.
4147 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4148 */
4149PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4150{
4151 /** @todo profile this! */
4152 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4153 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4154 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%RHp -> %p:{.idx=%d .GCPhys=%RGp .enmKind=%d}\n",
4155 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
4156 return pPage;
4157}
4158
4159
4160/**
4161 * Flushes the entire cache.
4162 *
4163 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4164 * and execute this CR3 flush.
4165 *
4166 * @param pPool The pool.
4167 */
4168void pgmPoolFlushAll(PVM pVM)
4169{
4170 LogFlow(("pgmPoolFlushAll:\n"));
4171 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4172}
4173
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette