VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 14751

Last change on this file since 14751 was 14751, checked in by vboxsync, 16 years ago

#1865: PGMAllPool: Fixes for non-contiguous mapping of the fake 2048 entry PAE PD (R0 darwin only).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 149.7 KB
Line 
1/* $Id: PGMAllPool.cpp 14751 2008-11-28 01:21:42Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42
43
44/*******************************************************************************
45* Internal Functions *
46*******************************************************************************/
47__BEGIN_DECLS
48static void pgmPoolFlushAllInt(PPGMPOOL pPool);
49#ifdef PGMPOOL_WITH_USER_TRACKING
50DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
51DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#endif
54#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
55static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
56#endif
57#ifdef PGMPOOL_WITH_CACHE
58static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
59#endif
60#ifdef PGMPOOL_WITH_MONITORING
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#endif
63#ifndef IN_RING3
64DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
65#endif
66__END_DECLS
67
68
69/**
70 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
71 *
72 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
73 * @param enmKind The page kind.
74 */
75DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
76{
77 switch (enmKind)
78 {
79 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
82 return true;
83 default:
84 return false;
85 }
86}
87
88
89#if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
90/**
91 * Maps a pool page into the current context.
92 *
93 * @returns Pointer to the mapping.
94 * @param pVM The VM handle.
95 * @param pPage The page to map.
96 */
97void *pgmPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
98{
99 /* general pages. */
100 if (pPage->idx >= PGMPOOL_IDX_FIRST)
101 {
102 Assert(pPage->idx < pVM->pgm.s.CTX_SUFF(pPool)->cCurPages);
103 void *pv;
104 int rc = PGMDynMapHCPage(pVM, pPage->Core.Key, &pv);
105 AssertReleaseRC(rc);
106 return pv;
107 }
108
109 /* special pages. */
110# ifdef IN_RC
111 switch (pPage->idx)
112 {
113 case PGMPOOL_IDX_PD:
114 return pVM->pgm.s.pShw32BitPdRC;
115 case PGMPOOL_IDX_PAE_PD:
116 case PGMPOOL_IDX_PAE_PD_0:
117 return pVM->pgm.s.apShwPaePDsRC[0];
118 case PGMPOOL_IDX_PAE_PD_1:
119 return pVM->pgm.s.apShwPaePDsRC[1];
120 case PGMPOOL_IDX_PAE_PD_2:
121 return pVM->pgm.s.apShwPaePDsRC[2];
122 case PGMPOOL_IDX_PAE_PD_3:
123 return pVM->pgm.s.apShwPaePDsRC[3];
124 case PGMPOOL_IDX_PDPT:
125 return pVM->pgm.s.pShwPaePdptRC;
126 default:
127 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
128 return NULL;
129 }
130
131# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
132 RTHCPHYS HCPhys;
133 switch (pPage->idx)
134 {
135 case PGMPOOL_IDX_PD:
136 HCPhys = pVM->pgm.s.HCPhysShw32BitPD;
137 break;
138 case PGMPOOL_IDX_PAE_PD_0:
139 HCPhys = pVM->pgm.s.aHCPhysPaePDs[0];
140 break;
141 case PGMPOOL_IDX_PAE_PD_1:
142 HCPhys = pVM->pgm.s.aHCPhysPaePDs[1];
143 break;
144 case PGMPOOL_IDX_PAE_PD_2:
145 HCPhys = pVM->pgm.s.aHCPhysPaePDs[2];
146 break;
147 case PGMPOOL_IDX_PAE_PD_3:
148 HCPhys = pVM->pgm.s.aHCPhysPaePDs[3];
149 break;
150 case PGMPOOL_IDX_PDPT:
151 HCPhys = pVM->pgm.s.HCPhysShwPaePdpt;
152 break;
153 case PGMPOOL_IDX_PAE_PD:
154 AssertReleaseMsgFailed(("PGMPOOL_IDX_PAE_PD is not usable in VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 context\n"));
155 return NULL;
156 default:
157 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
158 return NULL;
159 }
160 void *pv;
161 int rc = PGMDynMapHCPage(pVM, HCPhys, &pv);
162 AssertReleaseRC(rc);
163 return pv;
164# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
165}
166#endif /* IN_RC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
167
168
169#ifdef PGMPOOL_WITH_MONITORING
170/**
171 * Determin the size of a write instruction.
172 * @returns number of bytes written.
173 * @param pDis The disassembler state.
174 */
175static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
176{
177 /*
178 * This is very crude and possibly wrong for some opcodes,
179 * but since it's not really supposed to be called we can
180 * probably live with that.
181 */
182 return DISGetParamSize(pDis, &pDis->param1);
183}
184
185
186/**
187 * Flushes a chain of pages sharing the same access monitor.
188 *
189 * @returns VBox status code suitable for scheduling.
190 * @param pPool The pool.
191 * @param pPage A page in the chain.
192 */
193int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
194{
195 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
196
197 /*
198 * Find the list head.
199 */
200 uint16_t idx = pPage->idx;
201 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
202 {
203 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
204 {
205 idx = pPage->iMonitoredPrev;
206 Assert(idx != pPage->idx);
207 pPage = &pPool->aPages[idx];
208 }
209 }
210
211 /*
212 * Iterate the list flushing each shadow page.
213 */
214 int rc = VINF_SUCCESS;
215 for (;;)
216 {
217 idx = pPage->iMonitoredNext;
218 Assert(idx != pPage->idx);
219 if (pPage->idx >= PGMPOOL_IDX_FIRST)
220 {
221 int rc2 = pgmPoolFlushPage(pPool, pPage);
222 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
223 rc = VINF_PGM_SYNC_CR3;
224 }
225 /* next */
226 if (idx == NIL_PGMPOOL_IDX)
227 break;
228 pPage = &pPool->aPages[idx];
229 }
230 return rc;
231}
232
233
234/**
235 * Wrapper for getting the current context pointer to the entry being modified.
236 *
237 * @returns Pointer to the current context mapping of the entry.
238 * @param pPool The pool.
239 * @param pvFault The fault virtual address.
240 * @param GCPhysFault The fault physical address.
241 * @param cbEntry The entry size.
242 */
243#ifdef IN_RING3
244DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
245#else
246DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
247#endif
248{
249#ifdef IN_RC
250 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
251
252#elif defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
253 void *pvRet;
254 int rc = PGMDynMapGCPageOff(pPool->pVMR0, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
255 AssertFatalRCSuccess(rc);
256 return pvRet;
257
258#elif defined(IN_RING0)
259 void *pvRet;
260 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMR0->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
261 AssertFatalRCSuccess(rc);
262 return pvRet;
263
264#elif defined(IN_RING3)
265 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
266#else
267# error "huh?"
268#endif
269}
270
271
272/**
273 * Process shadow entries before they are changed by the guest.
274 *
275 * For PT entries we will clear them. For PD entries, we'll simply check
276 * for mapping conflicts and set the SyncCR3 FF if found.
277 *
278 * @param pPool The pool.
279 * @param pPage The head page.
280 * @param GCPhysFault The guest physical fault address.
281 * @param uAddress In R0 and GC this is the guest context fault address (flat).
282 * In R3 this is the host context 'fault' address.
283 * @param pCpu The disassembler state for figuring out the write size.
284 * This need not be specified if the caller knows we won't do cross entry accesses.
285 */
286#ifdef IN_RING3
287void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
288#else
289void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
290#endif
291{
292 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
293 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
294 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
295
296 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
297
298 for (;;)
299 {
300 union
301 {
302 void *pv;
303 PX86PT pPT;
304 PX86PTPAE pPTPae;
305 PX86PD pPD;
306 PX86PDPAE pPDPae;
307 PX86PDPT pPDPT;
308 PX86PML4 pPML4;
309 } uShw;
310
311 switch (pPage->enmKind)
312 {
313 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
314 {
315 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
316 const unsigned iShw = off / sizeof(X86PTE);
317 if (uShw.pPT->a[iShw].n.u1Present)
318 {
319# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
320 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
321 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
322 pgmPoolTracDerefGCPhysHint(pPool, pPage,
323 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
324 pGstPte->u & X86_PTE_PG_MASK);
325# endif
326 uShw.pPT->a[iShw].u = 0;
327 }
328 break;
329 }
330
331 /* page/2 sized */
332 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
333 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
334 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
335 {
336 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
337 if (uShw.pPTPae->a[iShw].n.u1Present)
338 {
339# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
340 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
341 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
342 pgmPoolTracDerefGCPhysHint(pPool, pPage,
343 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
344 pGstPte->u & X86_PTE_PG_MASK);
345# endif
346 uShw.pPTPae->a[iShw].u = 0;
347 }
348 }
349 break;
350
351 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
352 {
353 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
354 const unsigned iShw = off / sizeof(X86PTEPAE);
355 if (uShw.pPTPae->a[iShw].n.u1Present)
356 {
357# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
358 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
359 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
360 pgmPoolTracDerefGCPhysHint(pPool, pPage,
361 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
362 pGstPte->u & X86_PTE_PAE_PG_MASK);
363# endif
364 uShw.pPTPae->a[iShw].u = 0;
365 }
366
367 /* paranoia / a bit assumptive. */
368 if ( pCpu
369 && (off & 7)
370 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
371 {
372 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
373 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
374
375 if (uShw.pPTPae->a[iShw2].n.u1Present)
376 {
377# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
378 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
379 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
380 pgmPoolTracDerefGCPhysHint(pPool, pPage,
381 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
382 pGstPte->u & X86_PTE_PAE_PG_MASK);
383# endif
384 uShw.pPTPae->a[iShw2].u = 0;
385 }
386 }
387
388 break;
389 }
390
391 case PGMPOOLKIND_ROOT_32BIT_PD:
392 {
393 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
394 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
395 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
396 {
397 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
398 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
399 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
400 }
401 /* paranoia / a bit assumptive. */
402 else if ( pCpu
403 && (off & 3)
404 && (off & 3) + cbWrite > sizeof(X86PTE))
405 {
406 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
407 if ( iShw2 != iShw
408 && iShw2 < RT_ELEMENTS(uShw.pPD->a)
409 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
410 {
411 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
412 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
413 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
414 }
415 }
416#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
417 if ( uShw.pPD->a[iShw].n.u1Present
418 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
419 {
420 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
421# ifdef IN_RC /* TLB load - we're pushing things a bit... */
422 ASMProbeReadByte(pvAddress);
423# endif
424 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
425 uShw.pPD->a[iShw].u = 0;
426 }
427#endif
428 break;
429 }
430
431 case PGMPOOLKIND_ROOT_PAE_PD:
432 {
433 unsigned iGst = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
434 unsigned iShwPdpt = iGst & 3;
435 unsigned iShw = iGst / 4;
436 Assert(pPage->idx == PGMPOOL_IDX_PAE_PD);
437 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage + 1 + iShwPdpt);
438 for (unsigned i = 0; i < 2; i++, iShw++)
439 {
440 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
441 {
442 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
443 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
444 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw));
445 }
446 /* paranoia / a bit assumptive. */
447 else if ( pCpu
448 && (off & 3)
449 && (off & 3) + cbWrite > 4)
450 {
451 const unsigned iShw2 = iShw + 2;
452 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a) /** @todo was completely wrong, it's better now after #1865 but still wrong from cross PD. */
453 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
454 {
455 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
456 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
457 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
458 }
459 }
460#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
461 if ( uShw.pPDPae->a[iShw].n.u1Present
462 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
463 {
464 LogFlow(("pgmPoolMonitorChainChanging: iShwPdpt=%#x iShw=%#x: %RX64 -> freeing it!\n", iShwPdpt, iShw, uShw.pPDPae->a[iShw].u));
465# ifdef IN_RC /* TLB load - we're pushing things a bit... */
466 ASMProbeReadByte(pvAddress);
467# endif
468 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + iShwPdpt * X86_PG_PAE_ENTRIES);
469 uShw.pPDPae->a[iShw].u = 0;
470 }
471#endif
472 }
473 break;
474 }
475
476 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
477 {
478 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
479 const unsigned iShw = off / sizeof(X86PDEPAE);
480 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
481 {
482 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
483 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
484 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
485 }
486#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
487 /*
488 * Causes trouble when the guest uses a PDE to refer to the whole page table level
489 * structure. (Invalidate here; faults later on when it tries to change the page
490 * table entries -> recheck; probably only applies to the RC case.)
491 */
492 else
493 {
494 if (uShw.pPDPae->a[iShw].n.u1Present)
495 {
496 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
497 pgmPoolFree(pPool->CTX_SUFF(pVM),
498 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
499 /* Note: hardcoded PAE implementation dependency */
500 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
501 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
502 uShw.pPDPae->a[iShw].u = 0;
503 }
504 }
505#endif
506 /* paranoia / a bit assumptive. */
507 if ( pCpu
508 && (off & 7)
509 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
510 {
511 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
512 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
513
514 if ( iShw2 != iShw
515 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
516 {
517 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
518 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
519 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
520 }
521#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
522 else if (uShw.pPDPae->a[iShw2].n.u1Present)
523 {
524 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
525 pgmPoolFree(pPool->CTX_SUFF(pVM),
526 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
527 /* Note: hardcoded PAE implementation dependency */
528 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
529 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
530 uShw.pPDPae->a[iShw2].u = 0;
531 }
532#endif
533 }
534 break;
535 }
536
537 case PGMPOOLKIND_ROOT_PDPT:
538 {
539 /*
540 * Hopefully this doesn't happen very often:
541 * - touching unused parts of the page
542 * - messing with the bits of pd pointers without changing the physical address
543 */
544 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
545 const unsigned iShw = off / sizeof(X86PDPE);
546 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
547 {
548 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
549 {
550 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
551 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
552 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
553 }
554 /* paranoia / a bit assumptive. */
555 else if ( pCpu
556 && (off & 7)
557 && (off & 7) + cbWrite > sizeof(X86PDPE))
558 {
559 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
560 if ( iShw2 != iShw
561 && iShw2 < X86_PG_PAE_PDPE_ENTRIES
562 && uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
563 {
564 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
565 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
566 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
567 }
568 }
569 }
570 break;
571 }
572
573#ifndef IN_RC
574 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
575 {
576 Assert(pPage->enmKind == PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD);
577
578 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
579 const unsigned iShw = off / sizeof(X86PDEPAE);
580 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
581 {
582 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
583 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
584 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
585 }
586 else
587 {
588 if (uShw.pPDPae->a[iShw].n.u1Present)
589 {
590 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
591 pgmPoolFree(pPool->CTX_SUFF(pVM),
592 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
593 pPage->idx,
594 iShw);
595 uShw.pPDPae->a[iShw].u = 0;
596 }
597 }
598 /* paranoia / a bit assumptive. */
599 if ( pCpu
600 && (off & 7)
601 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
602 {
603 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
604 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
605
606 if ( iShw2 != iShw
607 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
608 {
609 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
610 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
611 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
612 }
613 else
614 if (uShw.pPDPae->a[iShw2].n.u1Present)
615 {
616 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
617 pgmPoolFree(pPool->CTX_SUFF(pVM),
618 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
619 pPage->idx,
620 iShw2);
621 uShw.pPDPae->a[iShw2].u = 0;
622 }
623 }
624 break;
625 }
626
627 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
628 {
629 /*
630 * Hopefully this doesn't happen very often:
631 * - messing with the bits of pd pointers without changing the physical address
632 */
633 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
634 {
635 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
636 const unsigned iShw = off / sizeof(X86PDPE);
637 if (uShw.pPDPT->a[iShw].n.u1Present)
638 {
639 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
640 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
641 uShw.pPDPT->a[iShw].u = 0;
642 }
643 /* paranoia / a bit assumptive. */
644 if ( pCpu
645 && (off & 7)
646 && (off & 7) + cbWrite > sizeof(X86PDPE))
647 {
648 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
649 if (uShw.pPDPT->a[iShw2].n.u1Present)
650 {
651 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
652 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
653 uShw.pPDPT->a[iShw2].u = 0;
654 }
655 }
656 }
657 break;
658 }
659
660 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
661 {
662 /*
663 * Hopefully this doesn't happen very often:
664 * - messing with the bits of pd pointers without changing the physical address
665 */
666 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
667 {
668 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
669 const unsigned iShw = off / sizeof(X86PDPE);
670 if (uShw.pPML4->a[iShw].n.u1Present)
671 {
672 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
673 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
674 uShw.pPML4->a[iShw].u = 0;
675 }
676 /* paranoia / a bit assumptive. */
677 if ( pCpu
678 && (off & 7)
679 && (off & 7) + cbWrite > sizeof(X86PDPE))
680 {
681 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
682 if (uShw.pPML4->a[iShw2].n.u1Present)
683 {
684 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
685 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
686 uShw.pPML4->a[iShw2].u = 0;
687 }
688 }
689 }
690 break;
691 }
692#endif /* IN_RING0 */
693
694 default:
695 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
696 }
697
698 /* next */
699 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
700 return;
701 pPage = &pPool->aPages[pPage->iMonitoredNext];
702 }
703}
704
705
706# ifndef IN_RING3
707/**
708 * Checks if a access could be a fork operation in progress.
709 *
710 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
711 *
712 * @returns true if it's likly that we're forking, otherwise false.
713 * @param pPool The pool.
714 * @param pCpu The disassembled instruction.
715 * @param offFault The access offset.
716 */
717DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
718{
719 /*
720 * i386 linux is using btr to clear X86_PTE_RW.
721 * The functions involved are (2.6.16 source inspection):
722 * clear_bit
723 * ptep_set_wrprotect
724 * copy_one_pte
725 * copy_pte_range
726 * copy_pmd_range
727 * copy_pud_range
728 * copy_page_range
729 * dup_mmap
730 * dup_mm
731 * copy_mm
732 * copy_process
733 * do_fork
734 */
735 if ( pCpu->pCurInstr->opcode == OP_BTR
736 && !(offFault & 4)
737 /** @todo Validate that the bit index is X86_PTE_RW. */
738 )
739 {
740 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
741 return true;
742 }
743 return false;
744}
745
746
747/**
748 * Determine whether the page is likely to have been reused.
749 *
750 * @returns true if we consider the page as being reused for a different purpose.
751 * @returns false if we consider it to still be a paging page.
752 * @param pVM VM Handle.
753 * @param pPage The page in question.
754 * @param pRegFrame Trap register frame.
755 * @param pCpu The disassembly info for the faulting instruction.
756 * @param pvFault The fault address.
757 *
758 * @remark The REP prefix check is left to the caller because of STOSD/W.
759 */
760DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
761{
762#ifndef IN_RC
763 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
764 if ( HWACCMHasPendingIrq(pVM)
765 && (pRegFrame->rsp - pvFault) < 32)
766 {
767 /* Fault caused by stack writes while trying to inject an interrupt event. */
768 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
769 return true;
770 }
771#else
772 NOREF(pVM); NOREF(pvFault);
773#endif
774
775 switch (pCpu->pCurInstr->opcode)
776 {
777 /* call implies the actual push of the return address faulted */
778 case OP_CALL:
779 Log4(("pgmPoolMonitorIsReused: CALL\n"));
780 return true;
781 case OP_PUSH:
782 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
783 return true;
784 case OP_PUSHF:
785 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
786 return true;
787 case OP_PUSHA:
788 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
789 return true;
790 case OP_FXSAVE:
791 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
792 return true;
793 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
794 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
795 return true;
796 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
797 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
798 return true;
799 case OP_MOVSWD:
800 case OP_STOSWD:
801 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
802 && pRegFrame->rcx >= 0x40
803 )
804 {
805 Assert(pCpu->mode == CPUMODE_64BIT);
806
807 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
808 return true;
809 }
810 return false;
811 }
812 if ( (pCpu->param1.flags & USE_REG_GEN32)
813 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
814 {
815 Log4(("pgmPoolMonitorIsReused: ESP\n"));
816 return true;
817 }
818
819 //if (pPage->fCR3Mix)
820 // return false;
821 return false;
822}
823
824
825/**
826 * Flushes the page being accessed.
827 *
828 * @returns VBox status code suitable for scheduling.
829 * @param pVM The VM handle.
830 * @param pPool The pool.
831 * @param pPage The pool page (head).
832 * @param pCpu The disassembly of the write instruction.
833 * @param pRegFrame The trap register frame.
834 * @param GCPhysFault The fault address as guest physical address.
835 * @param pvFault The fault address.
836 */
837static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
838 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
839{
840 /*
841 * First, do the flushing.
842 */
843 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
844
845 /*
846 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
847 */
848 uint32_t cbWritten;
849 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
850 if (RT_SUCCESS(rc2))
851 pRegFrame->rip += pCpu->opsize;
852 else if (rc2 == VERR_EM_INTERPRETER)
853 {
854#ifdef IN_RC
855 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
856 {
857 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
858 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
859 rc = VINF_SUCCESS;
860 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
861 }
862 else
863#endif
864 {
865 rc = VINF_EM_RAW_EMULATE_INSTR;
866 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
867 }
868 }
869 else
870 rc = rc2;
871
872 /* See use in pgmPoolAccessHandlerSimple(). */
873 PGM_INVL_GUEST_TLBS();
874
875 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
876 return rc;
877
878}
879
880
881/**
882 * Handles the STOSD write accesses.
883 *
884 * @returns VBox status code suitable for scheduling.
885 * @param pVM The VM handle.
886 * @param pPool The pool.
887 * @param pPage The pool page (head).
888 * @param pCpu The disassembly of the write instruction.
889 * @param pRegFrame The trap register frame.
890 * @param GCPhysFault The fault address as guest physical address.
891 * @param pvFault The fault address.
892 */
893DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
894 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
895{
896 Assert(pCpu->mode == CPUMODE_32BIT);
897
898 /*
899 * Increment the modification counter and insert it into the list
900 * of modified pages the first time.
901 */
902 if (!pPage->cModifications++)
903 pgmPoolMonitorModifiedInsert(pPool, pPage);
904
905 /*
906 * Execute REP STOSD.
907 *
908 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
909 * write situation, meaning that it's safe to write here.
910 */
911 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
912 while (pRegFrame->ecx)
913 {
914 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
915#ifdef IN_RC
916 *(uint32_t *)pu32 = pRegFrame->eax;
917#else
918 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
919#endif
920 pu32 += 4;
921 GCPhysFault += 4;
922 pRegFrame->edi += 4;
923 pRegFrame->ecx--;
924 }
925 pRegFrame->rip += pCpu->opsize;
926
927 /* See use in pgmPoolAccessHandlerSimple(). */
928 PGM_INVL_GUEST_TLBS();
929
930 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
931 return VINF_SUCCESS;
932}
933
934
935/**
936 * Handles the simple write accesses.
937 *
938 * @returns VBox status code suitable for scheduling.
939 * @param pVM The VM handle.
940 * @param pPool The pool.
941 * @param pPage The pool page (head).
942 * @param pCpu The disassembly of the write instruction.
943 * @param pRegFrame The trap register frame.
944 * @param GCPhysFault The fault address as guest physical address.
945 * @param pvFault The fault address.
946 */
947DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
948 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
949{
950 /*
951 * Increment the modification counter and insert it into the list
952 * of modified pages the first time.
953 */
954 if (!pPage->cModifications++)
955 pgmPoolMonitorModifiedInsert(pPool, pPage);
956
957 /*
958 * Clear all the pages. ASSUMES that pvFault is readable.
959 */
960 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
961
962 /*
963 * Interpret the instruction.
964 */
965 uint32_t cb;
966 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
967 if (RT_SUCCESS(rc))
968 pRegFrame->rip += pCpu->opsize;
969 else if (rc == VERR_EM_INTERPRETER)
970 {
971 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
972 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
973 rc = VINF_EM_RAW_EMULATE_INSTR;
974 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
975 }
976
977 /*
978 * Quick hack, with logging enabled we're getting stale
979 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
980 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
981 * have to be fixed to support this. But that'll have to wait till next week.
982 *
983 * An alternative is to keep track of the changed PTEs together with the
984 * GCPhys from the guest PT. This may proove expensive though.
985 *
986 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
987 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
988 */
989 PGM_INVL_GUEST_TLBS();
990
991 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
992 return rc;
993}
994
995
996/**
997 * \#PF Handler callback for PT write accesses.
998 *
999 * @returns VBox status code (appropriate for GC return).
1000 * @param pVM VM Handle.
1001 * @param uErrorCode CPU Error code.
1002 * @param pRegFrame Trap register frame.
1003 * NULL on DMA and other non CPU access.
1004 * @param pvFault The fault address (cr2).
1005 * @param GCPhysFault The GC physical address corresponding to pvFault.
1006 * @param pvUser User argument.
1007 */
1008DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1009{
1010 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1011 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1012 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1013 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1014
1015 /*
1016 * We should ALWAYS have the list head as user parameter. This
1017 * is because we use that page to record the changes.
1018 */
1019 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1020
1021 /*
1022 * Disassemble the faulting instruction.
1023 */
1024 DISCPUSTATE Cpu;
1025 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1026 AssertRCReturn(rc, rc);
1027
1028 /*
1029 * Check if it's worth dealing with.
1030 */
1031 bool fReused = false;
1032 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1033 || pPage->fCR3Mix)
1034 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1035 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1036 {
1037 /*
1038 * Simple instructions, no REP prefix.
1039 */
1040 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1041 {
1042 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1043 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1044 return rc;
1045 }
1046
1047 /*
1048 * Windows is frequently doing small memset() operations (netio test 4k+).
1049 * We have to deal with these or we'll kill the cache and performance.
1050 */
1051 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1052 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1053 && pRegFrame->ecx <= 0x20
1054 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1055 && !((uintptr_t)pvFault & 3)
1056 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1057 && Cpu.mode == CPUMODE_32BIT
1058 && Cpu.opmode == CPUMODE_32BIT
1059 && Cpu.addrmode == CPUMODE_32BIT
1060 && Cpu.prefix == PREFIX_REP
1061 && !pRegFrame->eflags.Bits.u1DF
1062 )
1063 {
1064 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1065 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1066 return rc;
1067 }
1068
1069 /* REP prefix, don't bother. */
1070 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1071 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1072 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1073 }
1074
1075 /*
1076 * Not worth it, so flush it.
1077 *
1078 * If we considered it to be reused, don't to back to ring-3
1079 * to emulate failed instructions since we usually cannot
1080 * interpret then. This may be a bit risky, in which case
1081 * the reuse detection must be fixed.
1082 */
1083 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1084 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1085 rc = VINF_SUCCESS;
1086 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1087 return rc;
1088}
1089
1090# endif /* !IN_RING3 */
1091#endif /* PGMPOOL_WITH_MONITORING */
1092
1093#ifdef PGMPOOL_WITH_CACHE
1094
1095/**
1096 * Inserts a page into the GCPhys hash table.
1097 *
1098 * @param pPool The pool.
1099 * @param pPage The page.
1100 */
1101DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1102{
1103 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1104 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1105 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1106 pPage->iNext = pPool->aiHash[iHash];
1107 pPool->aiHash[iHash] = pPage->idx;
1108}
1109
1110
1111/**
1112 * Removes a page from the GCPhys hash table.
1113 *
1114 * @param pPool The pool.
1115 * @param pPage The page.
1116 */
1117DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1118{
1119 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1120 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1121 if (pPool->aiHash[iHash] == pPage->idx)
1122 pPool->aiHash[iHash] = pPage->iNext;
1123 else
1124 {
1125 uint16_t iPrev = pPool->aiHash[iHash];
1126 for (;;)
1127 {
1128 const int16_t i = pPool->aPages[iPrev].iNext;
1129 if (i == pPage->idx)
1130 {
1131 pPool->aPages[iPrev].iNext = pPage->iNext;
1132 break;
1133 }
1134 if (i == NIL_PGMPOOL_IDX)
1135 {
1136 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1137 break;
1138 }
1139 iPrev = i;
1140 }
1141 }
1142 pPage->iNext = NIL_PGMPOOL_IDX;
1143}
1144
1145
1146/**
1147 * Frees up one cache page.
1148 *
1149 * @returns VBox status code.
1150 * @retval VINF_SUCCESS on success.
1151 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1152 * @param pPool The pool.
1153 * @param iUser The user index.
1154 */
1155static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1156{
1157#ifndef IN_RC
1158 const PVM pVM = pPool->CTX_SUFF(pVM);
1159#endif
1160 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1161 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1162
1163 /*
1164 * Select one page from the tail of the age list.
1165 */
1166 uint16_t iToFree = pPool->iAgeTail;
1167 if (iToFree == iUser)
1168 iToFree = pPool->aPages[iToFree].iAgePrev;
1169/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1170 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1171 {
1172 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1173 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1174 {
1175 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1176 continue;
1177 iToFree = i;
1178 break;
1179 }
1180 }
1181*/
1182
1183 Assert(iToFree != iUser);
1184 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1185
1186 PPGMPOOLPAGE pPage = &pPool->aPages[iToFree];
1187
1188 /*
1189 * Reject any attempts at flushing the currently active shadow CR3 mapping
1190 */
1191 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
1192 {
1193 /* Refresh the cr3 mapping by putting it at the head of the age list. */
1194 pgmPoolCacheUsed(pPool, pPage);
1195 return pgmPoolCacheFreeOne(pPool, iUser);
1196 }
1197
1198 int rc = pgmPoolFlushPage(pPool, pPage);
1199 if (rc == VINF_SUCCESS)
1200 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1201 return rc;
1202}
1203
1204
1205/**
1206 * Checks if a kind mismatch is really a page being reused
1207 * or if it's just normal remappings.
1208 *
1209 * @returns true if reused and the cached page (enmKind1) should be flushed
1210 * @returns false if not reused.
1211 * @param enmKind1 The kind of the cached page.
1212 * @param enmKind2 The kind of the requested page.
1213 */
1214static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1215{
1216 switch (enmKind1)
1217 {
1218 /*
1219 * Never reuse them. There is no remapping in non-paging mode.
1220 */
1221 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1222 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1223 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1224 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1225 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1226 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1227 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1228 return true;
1229
1230 /*
1231 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1232 */
1233 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1234 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1235 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1236 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1237 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1238 switch (enmKind2)
1239 {
1240 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1241 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1242 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1243 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1244 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1245 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1246 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1247 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1248 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1249 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1250 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1251 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1252 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1253 return true;
1254 default:
1255 return false;
1256 }
1257
1258 /*
1259 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1260 */
1261 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1262 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1263 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1264 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1265 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1266 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1267 switch (enmKind2)
1268 {
1269 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1270 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1271 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1272 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1273 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1274 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1275 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1276 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1277 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1278 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1279 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1280 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1281 return true;
1282 default:
1283 return false;
1284 }
1285
1286 /*
1287 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1288 */
1289 case PGMPOOLKIND_ROOT_32BIT_PD:
1290 case PGMPOOLKIND_ROOT_PAE_PD:
1291 case PGMPOOLKIND_ROOT_PDPT:
1292 case PGMPOOLKIND_ROOT_NESTED:
1293 return false;
1294
1295 default:
1296 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1297 }
1298}
1299
1300
1301/**
1302 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1303 *
1304 * @returns VBox status code.
1305 * @retval VINF_PGM_CACHED_PAGE on success.
1306 * @retval VERR_FILE_NOT_FOUND if not found.
1307 * @param pPool The pool.
1308 * @param GCPhys The GC physical address of the page we're gonna shadow.
1309 * @param enmKind The kind of mapping.
1310 * @param iUser The shadow page pool index of the user table.
1311 * @param iUserTable The index into the user table (shadowed).
1312 * @param ppPage Where to store the pointer to the page.
1313 */
1314static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1315{
1316#ifndef IN_RC
1317 const PVM pVM = pPool->CTX_SUFF(pVM);
1318#endif
1319 /*
1320 * Look up the GCPhys in the hash.
1321 */
1322 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1323 Log3(("pgmPoolCacheAlloc: %RGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1324 if (i != NIL_PGMPOOL_IDX)
1325 {
1326 do
1327 {
1328 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1329 Log3(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1330 if (pPage->GCPhys == GCPhys)
1331 {
1332 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1333 {
1334 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1335 if (RT_SUCCESS(rc))
1336 {
1337 *ppPage = pPage;
1338 STAM_COUNTER_INC(&pPool->StatCacheHits);
1339 return VINF_PGM_CACHED_PAGE;
1340 }
1341 return rc;
1342 }
1343
1344 /*
1345 * The kind is different. In some cases we should now flush the page
1346 * as it has been reused, but in most cases this is normal remapping
1347 * of PDs as PT or big pages using the GCPhys field in a slightly
1348 * different way than the other kinds.
1349 */
1350 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1351 {
1352 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1353 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1354 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1355 break;
1356 }
1357 }
1358
1359 /* next */
1360 i = pPage->iNext;
1361 } while (i != NIL_PGMPOOL_IDX);
1362 }
1363
1364 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1365 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1366 return VERR_FILE_NOT_FOUND;
1367}
1368
1369
1370/**
1371 * Inserts a page into the cache.
1372 *
1373 * @param pPool The pool.
1374 * @param pPage The cached page.
1375 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1376 */
1377static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1378{
1379 /*
1380 * Insert into the GCPhys hash if the page is fit for that.
1381 */
1382 Assert(!pPage->fCached);
1383 if (fCanBeCached)
1384 {
1385 pPage->fCached = true;
1386 pgmPoolHashInsert(pPool, pPage);
1387 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1388 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1389 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1390 }
1391 else
1392 {
1393 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1394 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1395 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1396 }
1397
1398 /*
1399 * Insert at the head of the age list.
1400 */
1401 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1402 pPage->iAgeNext = pPool->iAgeHead;
1403 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1404 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1405 else
1406 pPool->iAgeTail = pPage->idx;
1407 pPool->iAgeHead = pPage->idx;
1408}
1409
1410
1411/**
1412 * Flushes a cached page.
1413 *
1414 * @param pPool The pool.
1415 * @param pPage The cached page.
1416 */
1417static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1418{
1419 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1420
1421 /*
1422 * Remove the page from the hash.
1423 */
1424 if (pPage->fCached)
1425 {
1426 pPage->fCached = false;
1427 pgmPoolHashRemove(pPool, pPage);
1428 }
1429 else
1430 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1431
1432 /*
1433 * Remove it from the age list.
1434 */
1435 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1436 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1437 else
1438 pPool->iAgeTail = pPage->iAgePrev;
1439 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1440 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1441 else
1442 pPool->iAgeHead = pPage->iAgeNext;
1443 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1444 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1445}
1446
1447#endif /* PGMPOOL_WITH_CACHE */
1448#ifdef PGMPOOL_WITH_MONITORING
1449
1450/**
1451 * Looks for pages sharing the monitor.
1452 *
1453 * @returns Pointer to the head page.
1454 * @returns NULL if not found.
1455 * @param pPool The Pool
1456 * @param pNewPage The page which is going to be monitored.
1457 */
1458static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1459{
1460#ifdef PGMPOOL_WITH_CACHE
1461 /*
1462 * Look up the GCPhys in the hash.
1463 */
1464 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1465 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1466 if (i == NIL_PGMPOOL_IDX)
1467 return NULL;
1468 do
1469 {
1470 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1471 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1472 && pPage != pNewPage)
1473 {
1474 switch (pPage->enmKind)
1475 {
1476 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1477 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1478 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1479 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1480 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1481 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1482 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1483 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1484 case PGMPOOLKIND_ROOT_32BIT_PD:
1485 case PGMPOOLKIND_ROOT_PAE_PD:
1486 case PGMPOOLKIND_ROOT_PDPT:
1487 {
1488 /* find the head */
1489 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1490 {
1491 Assert(pPage->iMonitoredPrev != pPage->idx);
1492 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1493 }
1494 return pPage;
1495 }
1496
1497 /* ignore, no monitoring. */
1498 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1499 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1500 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1501 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1502 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1503 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1504 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1505 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1506 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1507 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1508 case PGMPOOLKIND_ROOT_NESTED:
1509 break;
1510 default:
1511 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1512 }
1513 }
1514
1515 /* next */
1516 i = pPage->iNext;
1517 } while (i != NIL_PGMPOOL_IDX);
1518#endif
1519 return NULL;
1520}
1521
1522
1523/**
1524 * Enabled write monitoring of a guest page.
1525 *
1526 * @returns VBox status code.
1527 * @retval VINF_SUCCESS on success.
1528 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1529 * @param pPool The pool.
1530 * @param pPage The cached page.
1531 */
1532static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1533{
1534 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1535
1536 /*
1537 * Filter out the relevant kinds.
1538 */
1539 switch (pPage->enmKind)
1540 {
1541 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1542 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1543 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1544 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1545 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1546 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1547 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1548 case PGMPOOLKIND_ROOT_PDPT:
1549 break;
1550
1551 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1552 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1553 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1554 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1555 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1556 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1557 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1558 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1559 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1560 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1561 case PGMPOOLKIND_ROOT_NESTED:
1562 /* Nothing to monitor here. */
1563 return VINF_SUCCESS;
1564
1565 case PGMPOOLKIND_ROOT_32BIT_PD:
1566 case PGMPOOLKIND_ROOT_PAE_PD:
1567#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1568 break;
1569#endif
1570 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1571 default:
1572 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1573 }
1574
1575 /*
1576 * Install handler.
1577 */
1578 int rc;
1579 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1580 if (pPageHead)
1581 {
1582 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1583 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1584 pPage->iMonitoredPrev = pPageHead->idx;
1585 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1586 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1587 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1588 pPageHead->iMonitoredNext = pPage->idx;
1589 rc = VINF_SUCCESS;
1590 }
1591 else
1592 {
1593 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1594 PVM pVM = pPool->CTX_SUFF(pVM);
1595 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1596 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1597 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1598 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1599 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1600 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1601 pPool->pszAccessHandler);
1602 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1603 * the heap size should suffice. */
1604 AssertFatalRC(rc);
1605 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1606 rc = VERR_PGM_POOL_CLEARED;
1607 }
1608 pPage->fMonitored = true;
1609 return rc;
1610}
1611
1612
1613/**
1614 * Disables write monitoring of a guest page.
1615 *
1616 * @returns VBox status code.
1617 * @retval VINF_SUCCESS on success.
1618 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1619 * @param pPool The pool.
1620 * @param pPage The cached page.
1621 */
1622static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1623{
1624 /*
1625 * Filter out the relevant kinds.
1626 */
1627 switch (pPage->enmKind)
1628 {
1629 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1630 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1631 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1632 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1633 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1634 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1635 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1636 case PGMPOOLKIND_ROOT_PDPT:
1637 break;
1638
1639 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1640 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1641 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1642 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1643 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1644 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1645 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1646 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1647 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1648 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1649 case PGMPOOLKIND_ROOT_NESTED:
1650 /* Nothing to monitor here. */
1651 return VINF_SUCCESS;
1652
1653 case PGMPOOLKIND_ROOT_32BIT_PD:
1654 case PGMPOOLKIND_ROOT_PAE_PD:
1655#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1656 break;
1657#endif
1658 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1659 default:
1660 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1661 }
1662
1663 /*
1664 * Remove the page from the monitored list or uninstall it if last.
1665 */
1666 const PVM pVM = pPool->CTX_SUFF(pVM);
1667 int rc;
1668 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1669 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1670 {
1671 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1672 {
1673 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1674 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1675 pNewHead->fCR3Mix = pPage->fCR3Mix;
1676 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1677 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1678 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1679 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
1680 pPool->pszAccessHandler);
1681 AssertFatalRCSuccess(rc);
1682 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1683 }
1684 else
1685 {
1686 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1687 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1688 {
1689 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1690 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1691 }
1692 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1693 rc = VINF_SUCCESS;
1694 }
1695 }
1696 else
1697 {
1698 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1699 AssertFatalRC(rc);
1700 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1701 rc = VERR_PGM_POOL_CLEARED;
1702 }
1703 pPage->fMonitored = false;
1704
1705 /*
1706 * Remove it from the list of modified pages (if in it).
1707 */
1708 pgmPoolMonitorModifiedRemove(pPool, pPage);
1709
1710 return rc;
1711}
1712
1713# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1714
1715/**
1716 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1717 *
1718 * @param pPool The Pool.
1719 * @param pPage A page in the chain.
1720 * @param fCR3Mix The new fCR3Mix value.
1721 */
1722static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1723{
1724 /* current */
1725 pPage->fCR3Mix = fCR3Mix;
1726
1727 /* before */
1728 int16_t idx = pPage->iMonitoredPrev;
1729 while (idx != NIL_PGMPOOL_IDX)
1730 {
1731 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1732 idx = pPool->aPages[idx].iMonitoredPrev;
1733 }
1734
1735 /* after */
1736 idx = pPage->iMonitoredNext;
1737 while (idx != NIL_PGMPOOL_IDX)
1738 {
1739 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1740 idx = pPool->aPages[idx].iMonitoredNext;
1741 }
1742}
1743
1744
1745/**
1746 * Installs or modifies monitoring of a CR3 page (special).
1747 *
1748 * We're pretending the CR3 page is shadowed by the pool so we can use the
1749 * generic mechanisms in detecting chained monitoring. (This also gives us a
1750 * tast of what code changes are required to really pool CR3 shadow pages.)
1751 *
1752 * @returns VBox status code.
1753 * @param pPool The pool.
1754 * @param idxRoot The CR3 (root) page index.
1755 * @param GCPhysCR3 The (new) CR3 value.
1756 */
1757int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1758{
1759 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1760 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1761 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d} GCPhysCR3=%RGp\n",
1762 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1763
1764 /*
1765 * The unlikely case where it already matches.
1766 */
1767 if (pPage->GCPhys == GCPhysCR3)
1768 {
1769 Assert(pPage->fMonitored);
1770 return VINF_SUCCESS;
1771 }
1772
1773 /*
1774 * Flush the current monitoring and remove it from the hash.
1775 */
1776 int rc = VINF_SUCCESS;
1777 if (pPage->fMonitored)
1778 {
1779 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1780 rc = pgmPoolMonitorFlush(pPool, pPage);
1781 if (rc == VERR_PGM_POOL_CLEARED)
1782 rc = VINF_SUCCESS;
1783 else
1784 AssertFatalRC(rc);
1785 pgmPoolHashRemove(pPool, pPage);
1786 }
1787
1788 /*
1789 * Monitor the page at the new location and insert it into the hash.
1790 */
1791 pPage->GCPhys = GCPhysCR3;
1792 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1793 if (rc2 != VERR_PGM_POOL_CLEARED)
1794 {
1795 AssertFatalRC(rc2);
1796 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1797 rc = rc2;
1798 }
1799 pgmPoolHashInsert(pPool, pPage);
1800 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1801 return rc;
1802}
1803
1804
1805/**
1806 * Removes the monitoring of a CR3 page (special).
1807 *
1808 * @returns VBox status code.
1809 * @param pPool The pool.
1810 * @param idxRoot The CR3 (root) page index.
1811 */
1812int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1813{
1814 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1815 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1816 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d}\n",
1817 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1818
1819 if (!pPage->fMonitored)
1820 return VINF_SUCCESS;
1821
1822 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1823 int rc = pgmPoolMonitorFlush(pPool, pPage);
1824 if (rc != VERR_PGM_POOL_CLEARED)
1825 AssertFatalRC(rc);
1826 else
1827 rc = VINF_SUCCESS;
1828 pgmPoolHashRemove(pPool, pPage);
1829 Assert(!pPage->fMonitored);
1830 pPage->GCPhys = NIL_RTGCPHYS;
1831 return rc;
1832}
1833
1834# endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1835
1836/**
1837 * Inserts the page into the list of modified pages.
1838 *
1839 * @param pPool The pool.
1840 * @param pPage The page.
1841 */
1842void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1843{
1844 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1845 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1846 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1847 && pPool->iModifiedHead != pPage->idx,
1848 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1849 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1850 pPool->iModifiedHead, pPool->cModifiedPages));
1851
1852 pPage->iModifiedNext = pPool->iModifiedHead;
1853 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1854 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1855 pPool->iModifiedHead = pPage->idx;
1856 pPool->cModifiedPages++;
1857#ifdef VBOX_WITH_STATISTICS
1858 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1859 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1860#endif
1861}
1862
1863
1864/**
1865 * Removes the page from the list of modified pages and resets the
1866 * moficiation counter.
1867 *
1868 * @param pPool The pool.
1869 * @param pPage The page which is believed to be in the list of modified pages.
1870 */
1871static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1872{
1873 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1874 if (pPool->iModifiedHead == pPage->idx)
1875 {
1876 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1877 pPool->iModifiedHead = pPage->iModifiedNext;
1878 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1879 {
1880 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1881 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1882 }
1883 pPool->cModifiedPages--;
1884 }
1885 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1886 {
1887 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1888 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1889 {
1890 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1891 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1892 }
1893 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1894 pPool->cModifiedPages--;
1895 }
1896 else
1897 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1898 pPage->cModifications = 0;
1899}
1900
1901
1902/**
1903 * Zaps the list of modified pages, resetting their modification counters in the process.
1904 *
1905 * @param pVM The VM handle.
1906 */
1907void pgmPoolMonitorModifiedClearAll(PVM pVM)
1908{
1909 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1910 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1911
1912 unsigned cPages = 0; NOREF(cPages);
1913 uint16_t idx = pPool->iModifiedHead;
1914 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1915 while (idx != NIL_PGMPOOL_IDX)
1916 {
1917 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1918 idx = pPage->iModifiedNext;
1919 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1920 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1921 pPage->cModifications = 0;
1922 Assert(++cPages);
1923 }
1924 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1925 pPool->cModifiedPages = 0;
1926}
1927
1928
1929/**
1930 * Clear all shadow pages and clear all modification counters.
1931 *
1932 * @param pVM The VM handle.
1933 * @remark Should only be used when monitoring is available, thus placed in
1934 * the PGMPOOL_WITH_MONITORING #ifdef.
1935 */
1936void pgmPoolClearAll(PVM pVM)
1937{
1938 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1939 STAM_PROFILE_START(&pPool->StatClearAll, c);
1940 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1941
1942 /*
1943 * Iterate all the pages until we've encountered all that in use.
1944 * This is simple but not quite optimal solution.
1945 */
1946 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1947 unsigned cLeft = pPool->cUsedPages;
1948 unsigned iPage = pPool->cCurPages;
1949 while (--iPage >= PGMPOOL_IDX_FIRST)
1950 {
1951 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1952 if (pPage->GCPhys != NIL_RTGCPHYS)
1953 {
1954 switch (pPage->enmKind)
1955 {
1956 /*
1957 * We only care about shadow page tables.
1958 */
1959 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1960 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1961 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1962 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1963 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1964 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1965 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1966 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1967 {
1968#ifdef PGMPOOL_WITH_USER_TRACKING
1969 if (pPage->cPresent)
1970#endif
1971 {
1972 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1973 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1974 ASMMemZeroPage(pvShw);
1975 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1976#ifdef PGMPOOL_WITH_USER_TRACKING
1977 pPage->cPresent = 0;
1978 pPage->iFirstPresent = ~0;
1979#endif
1980 }
1981 }
1982 /* fall thru */
1983
1984 default:
1985 Assert(!pPage->cModifications || ++cModifiedPages);
1986 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1987 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1988 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1989 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1990 pPage->cModifications = 0;
1991 break;
1992
1993 }
1994 if (!--cLeft)
1995 break;
1996 }
1997 }
1998
1999 /* swipe the special pages too. */
2000 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2001 {
2002 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2003 if (pPage->GCPhys != NIL_RTGCPHYS)
2004 {
2005 Assert(!pPage->cModifications || ++cModifiedPages);
2006 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2007 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2008 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2009 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2010 pPage->cModifications = 0;
2011 }
2012 }
2013
2014#ifndef DEBUG_michael
2015 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2016#endif
2017 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2018 pPool->cModifiedPages = 0;
2019
2020#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2021 /*
2022 * Clear all the GCPhys links and rebuild the phys ext free list.
2023 */
2024 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2025 pRam;
2026 pRam = pRam->CTX_SUFF(pNext))
2027 {
2028 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2029 while (iPage-- > 0)
2030 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2031 }
2032
2033 pPool->iPhysExtFreeHead = 0;
2034 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2035 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2036 for (unsigned i = 0; i < cMaxPhysExts; i++)
2037 {
2038 paPhysExts[i].iNext = i + 1;
2039 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2040 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2041 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2042 }
2043 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2044#endif
2045
2046
2047 pPool->cPresent = 0;
2048 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2049}
2050
2051
2052/**
2053 * Handle SyncCR3 pool tasks
2054 *
2055 * @returns VBox status code.
2056 * @retval VINF_SUCCESS if successfully added.
2057 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2058 * @param pVM The VM handle.
2059 * @remark Should only be used when monitoring is available, thus placed in
2060 * the PGMPOOL_WITH_MONITORING #ifdef.
2061 */
2062int pgmPoolSyncCR3(PVM pVM)
2063{
2064 /*
2065 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2066 * Occasionally we will have to clear all the shadow page tables because we wanted
2067 * to monitor a page which was mapped by too many shadowed page tables. This operation
2068 * sometimes refered to as a 'lightweight flush'.
2069 */
2070 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2071 pgmPoolMonitorModifiedClearAll(pVM);
2072 else
2073 {
2074# ifndef IN_RC
2075 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2076 pgmPoolClearAll(pVM);
2077# else
2078 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2079 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2080 return VINF_PGM_SYNC_CR3;
2081# endif
2082 }
2083 return VINF_SUCCESS;
2084}
2085
2086#endif /* PGMPOOL_WITH_MONITORING */
2087#ifdef PGMPOOL_WITH_USER_TRACKING
2088
2089/**
2090 * Frees up at least one user entry.
2091 *
2092 * @returns VBox status code.
2093 * @retval VINF_SUCCESS if successfully added.
2094 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2095 * @param pPool The pool.
2096 * @param iUser The user index.
2097 */
2098static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2099{
2100 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2101#ifdef PGMPOOL_WITH_CACHE
2102 /*
2103 * Just free cached pages in a braindead fashion.
2104 */
2105 /** @todo walk the age list backwards and free the first with usage. */
2106 int rc = VINF_SUCCESS;
2107 do
2108 {
2109 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2110 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2111 rc = rc2;
2112 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2113 return rc;
2114#else
2115 /*
2116 * Lazy approach.
2117 */
2118 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2119 Assert(!CPUMIsGuestInLongMode(pVM));
2120 pgmPoolFlushAllInt(pPool);
2121 return VERR_PGM_POOL_FLUSHED;
2122#endif
2123}
2124
2125
2126/**
2127 * Inserts a page into the cache.
2128 *
2129 * This will create user node for the page, insert it into the GCPhys
2130 * hash, and insert it into the age list.
2131 *
2132 * @returns VBox status code.
2133 * @retval VINF_SUCCESS if successfully added.
2134 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2135 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2136 * @param pPool The pool.
2137 * @param pPage The cached page.
2138 * @param GCPhys The GC physical address of the page we're gonna shadow.
2139 * @param iUser The user index.
2140 * @param iUserTable The user table index.
2141 */
2142DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2143{
2144 int rc = VINF_SUCCESS;
2145 PPGMPOOLUSER pUser = pPool->CTX_SUFF(paUsers);
2146
2147 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
2148
2149 /*
2150 * Find free a user node.
2151 */
2152 uint16_t i = pPool->iUserFreeHead;
2153 if (i == NIL_PGMPOOL_USER_INDEX)
2154 {
2155 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2156 if (RT_FAILURE(rc))
2157 return rc;
2158 i = pPool->iUserFreeHead;
2159 }
2160
2161 /*
2162 * Unlink the user node from the free list,
2163 * initialize and insert it into the user list.
2164 */
2165 pPool->iUserFreeHead = pUser[i].iNext;
2166 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
2167 pUser[i].iUser = iUser;
2168 pUser[i].iUserTable = iUserTable;
2169 pPage->iUserHead = i;
2170
2171 /*
2172 * Insert into cache and enable monitoring of the guest page if enabled.
2173 *
2174 * Until we implement caching of all levels, including the CR3 one, we'll
2175 * have to make sure we don't try monitor & cache any recursive reuse of
2176 * a monitored CR3 page. Because all windows versions are doing this we'll
2177 * have to be able to do combined access monitoring, CR3 + PT and
2178 * PD + PT (guest PAE).
2179 *
2180 * Update:
2181 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2182 */
2183#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2184# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2185 const bool fCanBeMonitored = true;
2186# else
2187 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2188 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2189 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2190# endif
2191# ifdef PGMPOOL_WITH_CACHE
2192 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2193# endif
2194 if (fCanBeMonitored)
2195 {
2196# ifdef PGMPOOL_WITH_MONITORING
2197 rc = pgmPoolMonitorInsert(pPool, pPage);
2198 if (rc == VERR_PGM_POOL_CLEARED)
2199 {
2200 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2201# ifndef PGMPOOL_WITH_CACHE
2202 pgmPoolMonitorFlush(pPool, pPage);
2203 rc = VERR_PGM_POOL_FLUSHED;
2204# endif
2205 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2206 pUser[i].iNext = pPool->iUserFreeHead;
2207 pUser[i].iUser = NIL_PGMPOOL_IDX;
2208 pPool->iUserFreeHead = i;
2209 }
2210 }
2211# endif
2212#endif /* PGMPOOL_WITH_MONITORING */
2213 return rc;
2214}
2215
2216
2217# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2218/**
2219 * Adds a user reference to a page.
2220 *
2221 * This will
2222 * This will move the page to the head of the
2223 *
2224 * @returns VBox status code.
2225 * @retval VINF_SUCCESS if successfully added.
2226 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2227 * @param pPool The pool.
2228 * @param pPage The cached page.
2229 * @param iUser The user index.
2230 * @param iUserTable The user table.
2231 */
2232static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2233{
2234 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2235
2236 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
2237# ifdef VBOX_STRICT
2238 /*
2239 * Check that the entry doesn't already exists.
2240 */
2241 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2242 {
2243 uint16_t i = pPage->iUserHead;
2244 do
2245 {
2246 Assert(i < pPool->cMaxUsers);
2247 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2248 i = paUsers[i].iNext;
2249 } while (i != NIL_PGMPOOL_USER_INDEX);
2250 }
2251# endif
2252
2253 /*
2254 * Allocate a user node.
2255 */
2256 uint16_t i = pPool->iUserFreeHead;
2257 if (i == NIL_PGMPOOL_USER_INDEX)
2258 {
2259 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2260 if (RT_FAILURE(rc))
2261 return rc;
2262 i = pPool->iUserFreeHead;
2263 }
2264 pPool->iUserFreeHead = paUsers[i].iNext;
2265
2266 /*
2267 * Initialize the user node and insert it.
2268 */
2269 paUsers[i].iNext = pPage->iUserHead;
2270 paUsers[i].iUser = iUser;
2271 paUsers[i].iUserTable = iUserTable;
2272 pPage->iUserHead = i;
2273
2274# ifdef PGMPOOL_WITH_CACHE
2275 /*
2276 * Tell the cache to update its replacement stats for this page.
2277 */
2278 pgmPoolCacheUsed(pPool, pPage);
2279# endif
2280 return VINF_SUCCESS;
2281}
2282# endif /* PGMPOOL_WITH_CACHE */
2283
2284
2285/**
2286 * Frees a user record associated with a page.
2287 *
2288 * This does not clear the entry in the user table, it simply replaces the
2289 * user record to the chain of free records.
2290 *
2291 * @param pPool The pool.
2292 * @param HCPhys The HC physical address of the shadow page.
2293 * @param iUser The shadow page pool index of the user table.
2294 * @param iUserTable The index into the user table (shadowed).
2295 */
2296static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2297{
2298 /*
2299 * Unlink and free the specified user entry.
2300 */
2301 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2302
2303 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2304 uint16_t i = pPage->iUserHead;
2305 if ( i != NIL_PGMPOOL_USER_INDEX
2306 && paUsers[i].iUser == iUser
2307 && paUsers[i].iUserTable == iUserTable)
2308 {
2309 pPage->iUserHead = paUsers[i].iNext;
2310
2311 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2312 paUsers[i].iNext = pPool->iUserFreeHead;
2313 pPool->iUserFreeHead = i;
2314 return;
2315 }
2316
2317 /* General: Linear search. */
2318 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2319 while (i != NIL_PGMPOOL_USER_INDEX)
2320 {
2321 if ( paUsers[i].iUser == iUser
2322 && paUsers[i].iUserTable == iUserTable)
2323 {
2324 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2325 paUsers[iPrev].iNext = paUsers[i].iNext;
2326 else
2327 pPage->iUserHead = paUsers[i].iNext;
2328
2329 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2330 paUsers[i].iNext = pPool->iUserFreeHead;
2331 pPool->iUserFreeHead = i;
2332 return;
2333 }
2334 iPrev = i;
2335 i = paUsers[i].iNext;
2336 }
2337
2338 /* Fatal: didn't find it */
2339 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2340 iUser, iUserTable, pPage->GCPhys));
2341}
2342
2343
2344/**
2345 * Gets the entry size of a shadow table.
2346 *
2347 * @param enmKind The kind of page.
2348 *
2349 * @returns The size of the entry in bytes. That is, 4 or 8.
2350 * @returns If the kind is not for a table, an assertion is raised and 0 is
2351 * returned.
2352 */
2353DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2354{
2355 switch (enmKind)
2356 {
2357 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2358 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2359 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2360 case PGMPOOLKIND_ROOT_32BIT_PD:
2361 return 4;
2362
2363 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2364 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2365 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2366 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2367 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2368 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2369 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2370 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2371 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2372 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2373 case PGMPOOLKIND_ROOT_PAE_PD:
2374 case PGMPOOLKIND_ROOT_PDPT:
2375 case PGMPOOLKIND_ROOT_NESTED:
2376 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2377 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2378 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2379 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2380 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2381 return 8;
2382
2383 default:
2384 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2385 }
2386}
2387
2388
2389/**
2390 * Gets the entry size of a guest table.
2391 *
2392 * @param enmKind The kind of page.
2393 *
2394 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2395 * @returns If the kind is not for a table, an assertion is raised and 0 is
2396 * returned.
2397 */
2398DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2399{
2400 switch (enmKind)
2401 {
2402 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2403 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2404 case PGMPOOLKIND_ROOT_32BIT_PD:
2405 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2406 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2407 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2408 return 4;
2409
2410 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2411 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2412 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2413 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2414 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2415 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2416 case PGMPOOLKIND_ROOT_PAE_PD:
2417 case PGMPOOLKIND_ROOT_PDPT:
2418 return 8;
2419
2420 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2421 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2422 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2423 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2424 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2425 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2426 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2427 case PGMPOOLKIND_ROOT_NESTED:
2428 /** @todo can we return 0? (nobody is calling this...) */
2429 AssertFailed();
2430 return 0;
2431
2432 default:
2433 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2434 }
2435}
2436
2437#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2438
2439/**
2440 * Scans one shadow page table for mappings of a physical page.
2441 *
2442 * @param pVM The VM handle.
2443 * @param pPhysPage The guest page in question.
2444 * @param iShw The shadow page table.
2445 * @param cRefs The number of references made in that PT.
2446 */
2447static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2448{
2449 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2450 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2451
2452 /*
2453 * Assert sanity.
2454 */
2455 Assert(cRefs == 1);
2456 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2457 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2458
2459 /*
2460 * Then, clear the actual mappings to the page in the shadow PT.
2461 */
2462 switch (pPage->enmKind)
2463 {
2464 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2465 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2466 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2467 {
2468 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2469 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2470 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2471 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2472 {
2473 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2474 pPT->a[i].u = 0;
2475 cRefs--;
2476 if (!cRefs)
2477 return;
2478 }
2479#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2480 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2481 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2482 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2483 {
2484 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2485 pPT->a[i].u = 0;
2486 }
2487#endif
2488 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2489 break;
2490 }
2491
2492 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2493 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2494 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2495 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2496 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2497 {
2498 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2499 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2500 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2501 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2502 {
2503 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2504 pPT->a[i].u = 0;
2505 cRefs--;
2506 if (!cRefs)
2507 return;
2508 }
2509#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2510 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2511 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2512 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2513 {
2514 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2515 pPT->a[i].u = 0;
2516 }
2517#endif
2518 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2519 break;
2520 }
2521
2522 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2523 {
2524 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2525 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2526 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2527 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2528 {
2529 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2530 pPT->a[i].u = 0;
2531 cRefs--;
2532 if (!cRefs)
2533 return;
2534 }
2535#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2536 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2537 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2538 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2539 {
2540 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2541 pPT->a[i].u = 0;
2542 }
2543#endif
2544 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2545 break;
2546 }
2547
2548 default:
2549 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2550 }
2551}
2552
2553
2554/**
2555 * Scans one shadow page table for mappings of a physical page.
2556 *
2557 * @param pVM The VM handle.
2558 * @param pPhysPage The guest page in question.
2559 * @param iShw The shadow page table.
2560 * @param cRefs The number of references made in that PT.
2561 */
2562void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2563{
2564 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2565 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2566 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2567 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2568 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2569 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2570}
2571
2572
2573/**
2574 * Flushes a list of shadow page tables mapping the same physical page.
2575 *
2576 * @param pVM The VM handle.
2577 * @param pPhysPage The guest page in question.
2578 * @param iPhysExt The physical cross reference extent list to flush.
2579 */
2580void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2581{
2582 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2583 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2584 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2585
2586 const uint16_t iPhysExtStart = iPhysExt;
2587 PPGMPOOLPHYSEXT pPhysExt;
2588 do
2589 {
2590 Assert(iPhysExt < pPool->cMaxPhysExts);
2591 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2592 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2593 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2594 {
2595 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2596 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2597 }
2598
2599 /* next */
2600 iPhysExt = pPhysExt->iNext;
2601 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2602
2603 /* insert the list into the free list and clear the ram range entry. */
2604 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2605 pPool->iPhysExtFreeHead = iPhysExtStart;
2606 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2607
2608 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2609}
2610
2611#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2612
2613/**
2614 * Scans all shadow page tables for mappings of a physical page.
2615 *
2616 * This may be slow, but it's most likely more efficient than cleaning
2617 * out the entire page pool / cache.
2618 *
2619 * @returns VBox status code.
2620 * @retval VINF_SUCCESS if all references has been successfully cleared.
2621 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2622 * a page pool cleaning.
2623 *
2624 * @param pVM The VM handle.
2625 * @param pPhysPage The guest page in question.
2626 */
2627int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2628{
2629 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2630 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2631 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2632 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2633
2634#if 1
2635 /*
2636 * There is a limit to what makes sense.
2637 */
2638 if (pPool->cPresent > 1024)
2639 {
2640 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2641 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2642 return VINF_PGM_GCPHYS_ALIASED;
2643 }
2644#endif
2645
2646 /*
2647 * Iterate all the pages until we've encountered all that in use.
2648 * This is simple but not quite optimal solution.
2649 */
2650 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2651 const uint32_t u32 = u64;
2652 unsigned cLeft = pPool->cUsedPages;
2653 unsigned iPage = pPool->cCurPages;
2654 while (--iPage >= PGMPOOL_IDX_FIRST)
2655 {
2656 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2657 if (pPage->GCPhys != NIL_RTGCPHYS)
2658 {
2659 switch (pPage->enmKind)
2660 {
2661 /*
2662 * We only care about shadow page tables.
2663 */
2664 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2665 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2666 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2667 {
2668 unsigned cPresent = pPage->cPresent;
2669 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2670 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2671 if (pPT->a[i].n.u1Present)
2672 {
2673 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2674 {
2675 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2676 pPT->a[i].u = 0;
2677 }
2678 if (!--cPresent)
2679 break;
2680 }
2681 break;
2682 }
2683
2684 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2685 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2686 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2687 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2688 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2689 {
2690 unsigned cPresent = pPage->cPresent;
2691 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2692 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2693 if (pPT->a[i].n.u1Present)
2694 {
2695 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2696 {
2697 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2698 pPT->a[i].u = 0;
2699 }
2700 if (!--cPresent)
2701 break;
2702 }
2703 break;
2704 }
2705 }
2706 if (!--cLeft)
2707 break;
2708 }
2709 }
2710
2711 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2712 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2713 return VINF_SUCCESS;
2714}
2715
2716
2717/**
2718 * Clears the user entry in a user table.
2719 *
2720 * This is used to remove all references to a page when flushing it.
2721 */
2722static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2723{
2724 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2725 Assert(pUser->iUser < pPool->cCurPages);
2726 uint32_t iUserTable = pUser->iUserTable;
2727
2728 /*
2729 * Map the user page.
2730 */
2731 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2732#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2733 if (pUserPage->enmKind == PGMPOOLKIND_ROOT_PAE_PD)
2734 {
2735 /* Must translate the fake 2048 entry PD to a 512 PD one since the R0 mapping is not linear. */
2736 Assert(pUser->iUser == PGMPOOL_IDX_PAE_PD);
2737 uint32_t iPdpt = iUserTable / X86_PG_PAE_ENTRIES;
2738 iUserTable %= X86_PG_PAE_ENTRIES;
2739 pUserPage = &pPool->aPages[PGMPOOL_IDX_PAE_PD_0 + iPdpt];
2740 Assert(pUserPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD);
2741 }
2742#endif
2743 union
2744 {
2745 uint64_t *pau64;
2746 uint32_t *pau32;
2747 } u;
2748 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
2749
2750 /* Safety precaution in case we change the paging for other modes too in the future. */
2751 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
2752
2753#ifdef VBOX_STRICT
2754 /*
2755 * Some sanity checks.
2756 */
2757 switch (pUserPage->enmKind)
2758 {
2759 case PGMPOOLKIND_ROOT_32BIT_PD:
2760 Assert(iUserTable < X86_PG_ENTRIES);
2761 Assert(!(u.pau32[iUserTable] & PGM_PDFLAGS_MAPPING));
2762 break;
2763# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2764 case PGMPOOLKIND_ROOT_PAE_PD:
2765 Assert(iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2766 AssertMsg(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING), ("%llx %d\n", u.pau64[iUserTable], iUserTable));
2767 break;
2768# endif
2769 case PGMPOOLKIND_ROOT_PDPT:
2770 Assert(iUserTable < 4);
2771 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2772 break;
2773 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2774 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2775 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2776 break;
2777 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2778 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2779 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
2780 break;
2781 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2782 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2783 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2784 break;
2785 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2786 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2787 /* GCPhys >> PAGE_SHIFT is the index here */
2788 break;
2789 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2790 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2791 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2792 break;
2793
2794 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2795 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2796 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2797 break;
2798
2799 case PGMPOOLKIND_ROOT_NESTED:
2800 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2801 break;
2802
2803 default:
2804 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2805 break;
2806 }
2807#endif /* VBOX_STRICT */
2808
2809 /*
2810 * Clear the entry in the user page.
2811 */
2812 switch (pUserPage->enmKind)
2813 {
2814 /* 32-bit entries */
2815 case PGMPOOLKIND_ROOT_32BIT_PD:
2816 u.pau32[iUserTable] = 0;
2817 break;
2818
2819 /* 64-bit entries */
2820 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2821 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2822 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2823 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2824 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2825 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2826 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2827#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2828 case PGMPOOLKIND_ROOT_PAE_PD:
2829#endif
2830 case PGMPOOLKIND_ROOT_PDPT:
2831 case PGMPOOLKIND_ROOT_NESTED:
2832 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2833 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2834 u.pau64[iUserTable] = 0;
2835 break;
2836
2837 default:
2838 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2839 }
2840}
2841
2842
2843/**
2844 * Clears all users of a page.
2845 */
2846static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2847{
2848 /*
2849 * Free all the user records.
2850 */
2851 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2852 uint16_t i = pPage->iUserHead;
2853 while (i != NIL_PGMPOOL_USER_INDEX)
2854 {
2855 /* Clear enter in user table. */
2856 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2857
2858 /* Free it. */
2859 const uint16_t iNext = paUsers[i].iNext;
2860 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2861 paUsers[i].iNext = pPool->iUserFreeHead;
2862 pPool->iUserFreeHead = i;
2863
2864 /* Next. */
2865 i = iNext;
2866 }
2867 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2868}
2869
2870#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2871
2872/**
2873 * Allocates a new physical cross reference extent.
2874 *
2875 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2876 * @param pVM The VM handle.
2877 * @param piPhysExt Where to store the phys ext index.
2878 */
2879PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2880{
2881 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2882 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2883 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2884 {
2885 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2886 return NULL;
2887 }
2888 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2889 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2890 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2891 *piPhysExt = iPhysExt;
2892 return pPhysExt;
2893}
2894
2895
2896/**
2897 * Frees a physical cross reference extent.
2898 *
2899 * @param pVM The VM handle.
2900 * @param iPhysExt The extent to free.
2901 */
2902void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2903{
2904 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2905 Assert(iPhysExt < pPool->cMaxPhysExts);
2906 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2907 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2908 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2909 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2910 pPool->iPhysExtFreeHead = iPhysExt;
2911}
2912
2913
2914/**
2915 * Frees a physical cross reference extent.
2916 *
2917 * @param pVM The VM handle.
2918 * @param iPhysExt The extent to free.
2919 */
2920void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2921{
2922 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2923
2924 const uint16_t iPhysExtStart = iPhysExt;
2925 PPGMPOOLPHYSEXT pPhysExt;
2926 do
2927 {
2928 Assert(iPhysExt < pPool->cMaxPhysExts);
2929 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2930 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2931 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2932
2933 /* next */
2934 iPhysExt = pPhysExt->iNext;
2935 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2936
2937 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2938 pPool->iPhysExtFreeHead = iPhysExtStart;
2939}
2940
2941
2942/**
2943 * Insert a reference into a list of physical cross reference extents.
2944 *
2945 * @returns The new ram range flags (top 16-bits).
2946 *
2947 * @param pVM The VM handle.
2948 * @param iPhysExt The physical extent index of the list head.
2949 * @param iShwPT The shadow page table index.
2950 *
2951 */
2952static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2953{
2954 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2955 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2956
2957 /* special common case. */
2958 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2959 {
2960 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2961 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2962 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2963 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2964 }
2965
2966 /* general treatment. */
2967 const uint16_t iPhysExtStart = iPhysExt;
2968 unsigned cMax = 15;
2969 for (;;)
2970 {
2971 Assert(iPhysExt < pPool->cMaxPhysExts);
2972 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2973 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2974 {
2975 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2976 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2977 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2978 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2979 }
2980 if (!--cMax)
2981 {
2982 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2983 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2984 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2985 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2986 }
2987 }
2988
2989 /* add another extent to the list. */
2990 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2991 if (!pNew)
2992 {
2993 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2994 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2995 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2996 }
2997 pNew->iNext = iPhysExtStart;
2998 pNew->aidx[0] = iShwPT;
2999 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3000 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3001}
3002
3003
3004/**
3005 * Add a reference to guest physical page where extents are in use.
3006 *
3007 * @returns The new ram range flags (top 16-bits).
3008 *
3009 * @param pVM The VM handle.
3010 * @param u16 The ram range flags (top 16-bits).
3011 * @param iShwPT The shadow page table index.
3012 */
3013uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3014{
3015 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
3016 {
3017 /*
3018 * Convert to extent list.
3019 */
3020 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
3021 uint16_t iPhysExt;
3022 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3023 if (pPhysExt)
3024 {
3025 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
3026 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3027 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
3028 pPhysExt->aidx[1] = iShwPT;
3029 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3030 }
3031 else
3032 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3033 }
3034 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
3035 {
3036 /*
3037 * Insert into the extent list.
3038 */
3039 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
3040 }
3041 else
3042 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3043 return u16;
3044}
3045
3046
3047/**
3048 * Clear references to guest physical memory.
3049 *
3050 * @param pPool The pool.
3051 * @param pPage The page.
3052 * @param pPhysPage Pointer to the aPages entry in the ram range.
3053 */
3054void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3055{
3056 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
3057 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3058
3059 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
3060 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
3061 {
3062 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3063 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3064 do
3065 {
3066 Assert(iPhysExt < pPool->cMaxPhysExts);
3067
3068 /*
3069 * Look for the shadow page and check if it's all freed.
3070 */
3071 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3072 {
3073 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3074 {
3075 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3076
3077 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3078 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3079 {
3080 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3081 return;
3082 }
3083
3084 /* we can free the node. */
3085 PVM pVM = pPool->CTX_SUFF(pVM);
3086 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3087 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3088 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3089 {
3090 /* lonely node */
3091 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3092 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
3093 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3094 }
3095 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3096 {
3097 /* head */
3098 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
3099 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
3100 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
3101 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
3102 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3103 }
3104 else
3105 {
3106 /* in list */
3107 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3108 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3109 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3110 }
3111 iPhysExt = iPhysExtNext;
3112 return;
3113 }
3114 }
3115
3116 /* next */
3117 iPhysExtPrev = iPhysExt;
3118 iPhysExt = paPhysExts[iPhysExt].iNext;
3119 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3120
3121 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3122 }
3123 else /* nothing to do */
3124 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
3125}
3126
3127
3128/**
3129 * Clear references to guest physical memory.
3130 *
3131 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3132 * is assumed to be correct, so the linear search can be skipped and we can assert
3133 * at an earlier point.
3134 *
3135 * @param pPool The pool.
3136 * @param pPage The page.
3137 * @param HCPhys The host physical address corresponding to the guest page.
3138 * @param GCPhys The guest physical address corresponding to HCPhys.
3139 */
3140static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3141{
3142 /*
3143 * Walk range list.
3144 */
3145 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3146 while (pRam)
3147 {
3148 RTGCPHYS off = GCPhys - pRam->GCPhys;
3149 if (off < pRam->cb)
3150 {
3151 /* does it match? */
3152 const unsigned iPage = off >> PAGE_SHIFT;
3153 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3154#ifdef LOG_ENABLED
3155RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3156Log(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3157#endif
3158 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3159 {
3160 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3161 return;
3162 }
3163 break;
3164 }
3165 pRam = pRam->CTX_SUFF(pNext);
3166 }
3167 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3168}
3169
3170
3171/**
3172 * Clear references to guest physical memory.
3173 *
3174 * @param pPool The pool.
3175 * @param pPage The page.
3176 * @param HCPhys The host physical address corresponding to the guest page.
3177 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3178 */
3179static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3180{
3181 /*
3182 * Walk range list.
3183 */
3184 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3185 while (pRam)
3186 {
3187 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3188 if (off < pRam->cb)
3189 {
3190 /* does it match? */
3191 const unsigned iPage = off >> PAGE_SHIFT;
3192 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3193 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3194 {
3195 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3196 return;
3197 }
3198 break;
3199 }
3200 pRam = pRam->CTX_SUFF(pNext);
3201 }
3202
3203 /*
3204 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3205 */
3206 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3207 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3208 while (pRam)
3209 {
3210 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3211 while (iPage-- > 0)
3212 {
3213 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3214 {
3215 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3216 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3217 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3218 return;
3219 }
3220 }
3221 pRam = pRam->CTX_SUFF(pNext);
3222 }
3223
3224 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3225}
3226
3227
3228/**
3229 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3230 *
3231 * @param pPool The pool.
3232 * @param pPage The page.
3233 * @param pShwPT The shadow page table (mapping of the page).
3234 * @param pGstPT The guest page table.
3235 */
3236DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3237{
3238 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3239 if (pShwPT->a[i].n.u1Present)
3240 {
3241 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3242 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3243 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3244 if (!--pPage->cPresent)
3245 break;
3246 }
3247}
3248
3249
3250/**
3251 * Clear references to guest physical memory in a PAE / 32-bit page table.
3252 *
3253 * @param pPool The pool.
3254 * @param pPage The page.
3255 * @param pShwPT The shadow page table (mapping of the page).
3256 * @param pGstPT The guest page table (just a half one).
3257 */
3258DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3259{
3260 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3261 if (pShwPT->a[i].n.u1Present)
3262 {
3263 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3264 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3265 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3266 }
3267}
3268
3269
3270/**
3271 * Clear references to guest physical memory in a PAE / PAE page table.
3272 *
3273 * @param pPool The pool.
3274 * @param pPage The page.
3275 * @param pShwPT The shadow page table (mapping of the page).
3276 * @param pGstPT The guest page table.
3277 */
3278DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3279{
3280 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3281 if (pShwPT->a[i].n.u1Present)
3282 {
3283 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3284 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3285 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3286 }
3287}
3288
3289
3290/**
3291 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3292 *
3293 * @param pPool The pool.
3294 * @param pPage The page.
3295 * @param pShwPT The shadow page table (mapping of the page).
3296 */
3297DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3298{
3299 RTGCPHYS GCPhys = pPage->GCPhys;
3300 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3301 if (pShwPT->a[i].n.u1Present)
3302 {
3303 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3304 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3305 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3306 }
3307}
3308
3309
3310/**
3311 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3312 *
3313 * @param pPool The pool.
3314 * @param pPage The page.
3315 * @param pShwPT The shadow page table (mapping of the page).
3316 */
3317DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3318{
3319 RTGCPHYS GCPhys = pPage->GCPhys;
3320 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3321 if (pShwPT->a[i].n.u1Present)
3322 {
3323 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3324 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3325 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3326 }
3327}
3328
3329#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3330
3331/**
3332 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3333 *
3334 * @param pPool The pool.
3335 * @param pPage The page.
3336 * @param pShwPD The shadow page directory (mapping of the page).
3337 */
3338DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3339{
3340 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3341 {
3342 if (pShwPD->a[i].n.u1Present)
3343 {
3344 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3345 if (pSubPage)
3346 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3347 else
3348 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3349 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3350 }
3351 }
3352}
3353
3354
3355/**
3356 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3357 *
3358 * @param pPool The pool.
3359 * @param pPage The page.
3360 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3361 */
3362DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3363{
3364 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3365 {
3366 if (pShwPDPT->a[i].n.u1Present)
3367 {
3368 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3369 if (pSubPage)
3370 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3371 else
3372 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3373 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3374 }
3375 }
3376}
3377
3378
3379/**
3380 * Clear references to shadowed pages in a 64-bit level 4 page table.
3381 *
3382 * @param pPool The pool.
3383 * @param pPage The page.
3384 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3385 */
3386DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3387{
3388 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3389 {
3390 if (pShwPML4->a[i].n.u1Present)
3391 {
3392 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3393 if (pSubPage)
3394 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3395 else
3396 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3397 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3398 }
3399 }
3400}
3401
3402
3403/**
3404 * Clear references to shadowed pages in an EPT page table.
3405 *
3406 * @param pPool The pool.
3407 * @param pPage The page.
3408 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3409 */
3410DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3411{
3412 RTGCPHYS GCPhys = pPage->GCPhys;
3413 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3414 if (pShwPT->a[i].n.u1Present)
3415 {
3416 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3417 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3418 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3419 }
3420}
3421
3422
3423/**
3424 * Clear references to shadowed pages in an EPT page directory.
3425 *
3426 * @param pPool The pool.
3427 * @param pPage The page.
3428 * @param pShwPD The shadow page directory (mapping of the page).
3429 */
3430DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3431{
3432 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3433 {
3434 if (pShwPD->a[i].n.u1Present)
3435 {
3436 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3437 if (pSubPage)
3438 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3439 else
3440 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3441 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3442 }
3443 }
3444}
3445
3446
3447/**
3448 * Clear references to shadowed pages in an EPT page directory pointer table.
3449 *
3450 * @param pPool The pool.
3451 * @param pPage The page.
3452 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3453 */
3454DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3455{
3456 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3457 {
3458 if (pShwPDPT->a[i].n.u1Present)
3459 {
3460 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3461 if (pSubPage)
3462 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3463 else
3464 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3465 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3466 }
3467 }
3468}
3469
3470
3471/**
3472 * Clears all references made by this page.
3473 *
3474 * This includes other shadow pages and GC physical addresses.
3475 *
3476 * @param pPool The pool.
3477 * @param pPage The page.
3478 */
3479static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3480{
3481 /*
3482 * Map the shadow page and take action according to the page kind.
3483 */
3484 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3485 switch (pPage->enmKind)
3486 {
3487#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3488 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3489 {
3490 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3491 void *pvGst;
3492 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3493 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3494 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3495 break;
3496 }
3497
3498 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3499 {
3500 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3501 void *pvGst;
3502 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3503 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3504 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3505 break;
3506 }
3507
3508 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3509 {
3510 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3511 void *pvGst;
3512 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3513 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3514 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3515 break;
3516 }
3517
3518 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3519 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3520 {
3521 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3522 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3523 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3524 break;
3525 }
3526
3527 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3528 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3529 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3530 {
3531 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3532 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3533 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3534 break;
3535 }
3536
3537#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3538 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3539 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3540 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3541 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3542 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3543 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3544 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3545 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3546 break;
3547#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3548
3549 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
3550 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3551 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3552 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3553 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3554 break;
3555
3556 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3557 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3558 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3559 break;
3560
3561 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
3562 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3563 break;
3564
3565 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3566 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
3567 break;
3568
3569 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3570 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
3571 break;
3572
3573 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3574 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
3575 break;
3576
3577 default:
3578 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3579 }
3580
3581 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3582 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3583 ASMMemZeroPage(pvShw);
3584 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3585 pPage->fZeroed = true;
3586}
3587
3588#endif /* PGMPOOL_WITH_USER_TRACKING */
3589
3590/**
3591 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3592 *
3593 * @param pPool The pool.
3594 */
3595static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3596{
3597 /*
3598 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3599 */
3600 Assert(NIL_PGMPOOL_IDX == 0);
3601 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3602 {
3603 /*
3604 * Get the page address.
3605 */
3606 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3607 union
3608 {
3609 uint64_t *pau64;
3610 uint32_t *pau32;
3611 } u;
3612
3613 /*
3614 * Mark stuff not present.
3615 */
3616 switch (pPage->enmKind)
3617 {
3618 case PGMPOOLKIND_ROOT_32BIT_PD:
3619 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3620 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3621 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3622 u.pau32[iPage] = 0;
3623 break;
3624
3625 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3626 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3627 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
3628 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3629 u.pau64[iPage] = 0;
3630 break;
3631
3632 case PGMPOOLKIND_ROOT_PDPT:
3633 /* Not root of shadowed pages currently, ignore it. */
3634 break;
3635
3636 case PGMPOOLKIND_ROOT_NESTED:
3637 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3638 ASMMemZero32(u.pau64, PAGE_SIZE);
3639 break;
3640 }
3641 }
3642
3643 /*
3644 * Paranoia (to be removed), flag a global CR3 sync.
3645 */
3646 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
3647}
3648
3649
3650/**
3651 * Flushes the entire cache.
3652 *
3653 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3654 * and execute this CR3 flush.
3655 *
3656 * @param pPool The pool.
3657 */
3658static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3659{
3660 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3661 LogFlow(("pgmPoolFlushAllInt:\n"));
3662
3663 /*
3664 * If there are no pages in the pool, there is nothing to do.
3665 */
3666 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3667 {
3668 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3669 return;
3670 }
3671
3672 /*
3673 * Nuke the free list and reinsert all pages into it.
3674 */
3675 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3676 {
3677 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3678
3679#ifdef IN_RING3
3680 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMR3, pPage->pvPageR3));
3681#endif
3682#ifdef PGMPOOL_WITH_MONITORING
3683 if (pPage->fMonitored)
3684 pgmPoolMonitorFlush(pPool, pPage);
3685 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3686 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3687 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3688 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3689 pPage->cModifications = 0;
3690#endif
3691 pPage->GCPhys = NIL_RTGCPHYS;
3692 pPage->enmKind = PGMPOOLKIND_FREE;
3693 Assert(pPage->idx == i);
3694 pPage->iNext = i + 1;
3695 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3696 pPage->fSeenNonGlobal = false;
3697 pPage->fMonitored= false;
3698 pPage->fCached = false;
3699 pPage->fReusedFlushPending = false;
3700 pPage->fCR3Mix = false;
3701#ifdef PGMPOOL_WITH_USER_TRACKING
3702 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3703#endif
3704#ifdef PGMPOOL_WITH_CACHE
3705 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3706 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3707#endif
3708 }
3709 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3710 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3711 pPool->cUsedPages = 0;
3712
3713#ifdef PGMPOOL_WITH_USER_TRACKING
3714 /*
3715 * Zap and reinitialize the user records.
3716 */
3717 pPool->cPresent = 0;
3718 pPool->iUserFreeHead = 0;
3719 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3720 const unsigned cMaxUsers = pPool->cMaxUsers;
3721 for (unsigned i = 0; i < cMaxUsers; i++)
3722 {
3723 paUsers[i].iNext = i + 1;
3724 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3725 paUsers[i].iUserTable = 0xfffffffe;
3726 }
3727 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3728#endif
3729
3730#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3731 /*
3732 * Clear all the GCPhys links and rebuild the phys ext free list.
3733 */
3734 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3735 pRam;
3736 pRam = pRam->CTX_SUFF(pNext))
3737 {
3738 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3739 while (iPage-- > 0)
3740 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3741 }
3742
3743 pPool->iPhysExtFreeHead = 0;
3744 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3745 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3746 for (unsigned i = 0; i < cMaxPhysExts; i++)
3747 {
3748 paPhysExts[i].iNext = i + 1;
3749 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3750 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3751 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3752 }
3753 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3754#endif
3755
3756#ifdef PGMPOOL_WITH_MONITORING
3757 /*
3758 * Just zap the modified list.
3759 */
3760 pPool->cModifiedPages = 0;
3761 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3762#endif
3763
3764#ifdef PGMPOOL_WITH_CACHE
3765 /*
3766 * Clear the GCPhys hash and the age list.
3767 */
3768 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
3769 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3770 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3771 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3772#endif
3773
3774 /*
3775 * Flush all the special root pages.
3776 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3777 */
3778 pgmPoolFlushAllSpecialRoots(pPool);
3779 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3780 {
3781 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3782 pPage->iNext = NIL_PGMPOOL_IDX;
3783#ifdef PGMPOOL_WITH_MONITORING
3784 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3785 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3786 pPage->cModifications = 0;
3787 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3788 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3789 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3790 if (pPage->fMonitored)
3791 {
3792 PVM pVM = pPool->CTX_SUFF(pVM);
3793 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3794 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3795 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3796 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
3797 pPool->pszAccessHandler);
3798 AssertFatalRCSuccess(rc);
3799# ifdef PGMPOOL_WITH_CACHE
3800 pgmPoolHashInsert(pPool, pPage);
3801# endif
3802 }
3803#endif
3804#ifdef PGMPOOL_WITH_USER_TRACKING
3805 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3806#endif
3807#ifdef PGMPOOL_WITH_CACHE
3808 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3809 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3810#endif
3811 }
3812
3813 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3814}
3815
3816
3817/**
3818 * Flushes a pool page.
3819 *
3820 * This moves the page to the free list after removing all user references to it.
3821 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3822 *
3823 * @returns VBox status code.
3824 * @retval VINF_SUCCESS on success.
3825 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3826 * @param pPool The pool.
3827 * @param HCPhys The HC physical address of the shadow page.
3828 */
3829int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3830{
3831 int rc = VINF_SUCCESS;
3832 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3833 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%d, .GCPhys=%RGp}\n",
3834 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3835
3836 /*
3837 * Quietly reject any attempts at flushing any of the special root pages.
3838 */
3839 if (pPage->idx < PGMPOOL_IDX_FIRST)
3840 {
3841 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3842 return VINF_SUCCESS;
3843 }
3844
3845 /*
3846 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
3847 */
3848 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
3849 {
3850 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4,
3851 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
3852 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3853 return VINF_SUCCESS;
3854 }
3855
3856 /*
3857 * Mark the page as being in need of a ASMMemZeroPage().
3858 */
3859 pPage->fZeroed = false;
3860
3861#ifdef PGMPOOL_WITH_USER_TRACKING
3862 /*
3863 * Clear the page.
3864 */
3865 pgmPoolTrackClearPageUsers(pPool, pPage);
3866 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3867 pgmPoolTrackDeref(pPool, pPage);
3868 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3869#endif
3870
3871#ifdef PGMPOOL_WITH_CACHE
3872 /*
3873 * Flush it from the cache.
3874 */
3875 pgmPoolCacheFlushPage(pPool, pPage);
3876#endif /* PGMPOOL_WITH_CACHE */
3877
3878#ifdef PGMPOOL_WITH_MONITORING
3879 /*
3880 * Deregistering the monitoring.
3881 */
3882 if (pPage->fMonitored)
3883 rc = pgmPoolMonitorFlush(pPool, pPage);
3884#endif
3885
3886 /*
3887 * Free the page.
3888 */
3889 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3890 pPage->iNext = pPool->iFreeHead;
3891 pPool->iFreeHead = pPage->idx;
3892 pPage->enmKind = PGMPOOLKIND_FREE;
3893 pPage->GCPhys = NIL_RTGCPHYS;
3894 pPage->fReusedFlushPending = false;
3895
3896 pPool->cUsedPages--;
3897 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3898 return rc;
3899}
3900
3901
3902/**
3903 * Frees a usage of a pool page.
3904 *
3905 * The caller is responsible to updating the user table so that it no longer
3906 * references the shadow page.
3907 *
3908 * @param pPool The pool.
3909 * @param HCPhys The HC physical address of the shadow page.
3910 * @param iUser The shadow page pool index of the user table.
3911 * @param iUserTable The index into the user table (shadowed).
3912 */
3913void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3914{
3915 STAM_PROFILE_START(&pPool->StatFree, a);
3916 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3917 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3918 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3919#ifdef PGMPOOL_WITH_USER_TRACKING
3920 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3921#endif
3922#ifdef PGMPOOL_WITH_CACHE
3923 if (!pPage->fCached)
3924#endif
3925 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3926 STAM_PROFILE_STOP(&pPool->StatFree, a);
3927}
3928
3929
3930/**
3931 * Makes one or more free page free.
3932 *
3933 * @returns VBox status code.
3934 * @retval VINF_SUCCESS on success.
3935 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3936 *
3937 * @param pPool The pool.
3938 * @param iUser The user of the page.
3939 */
3940static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3941{
3942 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3943
3944 /*
3945 * If the pool isn't full grown yet, expand it.
3946 */
3947 if (pPool->cCurPages < pPool->cMaxPages)
3948 {
3949 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3950#ifdef IN_RING3
3951 int rc = PGMR3PoolGrow(pPool->pVMR3);
3952#else
3953 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3954#endif
3955 if (RT_FAILURE(rc))
3956 return rc;
3957 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3958 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3959 return VINF_SUCCESS;
3960 }
3961
3962#ifdef PGMPOOL_WITH_CACHE
3963 /*
3964 * Free one cached page.
3965 */
3966 return pgmPoolCacheFreeOne(pPool, iUser);
3967#else
3968 /*
3969 * Flush the pool.
3970 * If we have tracking enabled, it should be possible to come up with
3971 * a cheap replacement strategy...
3972 */
3973 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
3974 Assert(!CPUMIsGuestInLongMode(pVM));
3975 pgmPoolFlushAllInt(pPool);
3976 return VERR_PGM_POOL_FLUSHED;
3977#endif
3978}
3979
3980
3981/**
3982 * Allocates a page from the pool.
3983 *
3984 * This page may actually be a cached page and not in need of any processing
3985 * on the callers part.
3986 *
3987 * @returns VBox status code.
3988 * @retval VINF_SUCCESS if a NEW page was allocated.
3989 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3990 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3991 * @param pVM The VM handle.
3992 * @param GCPhys The GC physical address of the page we're gonna shadow.
3993 * For 4MB and 2MB PD entries, it's the first address the
3994 * shadow PT is covering.
3995 * @param enmKind The kind of mapping.
3996 * @param iUser The shadow page pool index of the user table.
3997 * @param iUserTable The index into the user table (shadowed).
3998 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3999 */
4000int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
4001{
4002 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4003 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4004 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
4005 *ppPage = NULL;
4006
4007#ifdef PGMPOOL_WITH_CACHE
4008 if (pPool->fCacheEnabled)
4009 {
4010 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
4011 if (RT_SUCCESS(rc2))
4012 {
4013 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4014 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4015 return rc2;
4016 }
4017 }
4018#endif
4019
4020 /*
4021 * Allocate a new one.
4022 */
4023 int rc = VINF_SUCCESS;
4024 uint16_t iNew = pPool->iFreeHead;
4025 if (iNew == NIL_PGMPOOL_IDX)
4026 {
4027 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
4028 if (RT_FAILURE(rc))
4029 {
4030 if (rc != VERR_PGM_POOL_CLEARED)
4031 {
4032 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4033 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4034 return rc;
4035 }
4036 Log(("pgmPoolMakeMoreFreePages failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
4037 rc = VERR_PGM_POOL_FLUSHED;
4038 }
4039 iNew = pPool->iFreeHead;
4040 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4041 }
4042
4043 /* unlink the free head */
4044 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4045 pPool->iFreeHead = pPage->iNext;
4046 pPage->iNext = NIL_PGMPOOL_IDX;
4047
4048 /*
4049 * Initialize it.
4050 */
4051 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4052 pPage->enmKind = enmKind;
4053 pPage->GCPhys = GCPhys;
4054 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4055 pPage->fMonitored = false;
4056 pPage->fCached = false;
4057 pPage->fReusedFlushPending = false;
4058 pPage->fCR3Mix = false;
4059#ifdef PGMPOOL_WITH_MONITORING
4060 pPage->cModifications = 0;
4061 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4062 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4063#endif
4064#ifdef PGMPOOL_WITH_USER_TRACKING
4065 pPage->cPresent = 0;
4066 pPage->iFirstPresent = ~0;
4067
4068 /*
4069 * Insert into the tracking and cache. If this fails, free the page.
4070 */
4071 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4072 if (RT_FAILURE(rc3))
4073 {
4074 if (rc3 != VERR_PGM_POOL_CLEARED)
4075 {
4076 pPool->cUsedPages--;
4077 pPage->enmKind = PGMPOOLKIND_FREE;
4078 pPage->GCPhys = NIL_RTGCPHYS;
4079 pPage->iNext = pPool->iFreeHead;
4080 pPool->iFreeHead = pPage->idx;
4081 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4082 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4083 return rc3;
4084 }
4085 Log(("pgmPoolTrackInsert failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4086 rc = VERR_PGM_POOL_FLUSHED;
4087 }
4088#endif /* PGMPOOL_WITH_USER_TRACKING */
4089
4090 /*
4091 * Commit the allocation, clear the page and return.
4092 */
4093#ifdef VBOX_WITH_STATISTICS
4094 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4095 pPool->cUsedPagesHigh = pPool->cUsedPages;
4096#endif
4097
4098 if (!pPage->fZeroed)
4099 {
4100 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4101 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4102 ASMMemZeroPage(pv);
4103 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4104 }
4105
4106 *ppPage = pPage;
4107 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4108 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4109 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4110 return rc;
4111}
4112
4113
4114/**
4115 * Frees a usage of a pool page.
4116 *
4117 * @param pVM The VM handle.
4118 * @param HCPhys The HC physical address of the shadow page.
4119 * @param iUser The shadow page pool index of the user table.
4120 * @param iUserTable The index into the user table (shadowed).
4121 */
4122void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4123{
4124 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4125 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4126 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4127}
4128
4129
4130/**
4131 * Gets a in-use page in the pool by it's physical address.
4132 *
4133 * @returns Pointer to the page.
4134 * @param pVM The VM handle.
4135 * @param HCPhys The HC physical address of the shadow page.
4136 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4137 */
4138PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4139{
4140 /** @todo profile this! */
4141 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4142 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4143 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%RHp -> %p:{.idx=%d .GCPhys=%RGp .enmKind=%d}\n",
4144 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
4145 return pPage;
4146}
4147
4148
4149/**
4150 * Flushes the entire cache.
4151 *
4152 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4153 * and execute this CR3 flush.
4154 *
4155 * @param pPool The pool.
4156 */
4157void pgmPoolFlushAll(PVM pVM)
4158{
4159 LogFlow(("pgmPoolFlushAll:\n"));
4160 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4161}
4162
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette