VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 13782

Last change on this file since 13782 was 13585, checked in by vboxsync, 16 years ago

Refresh the cr3 mapping by putting it at the head of the age list. (pml4 root only)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 147.8 KB
Line 
1/* $Id: PGMAllPool.cpp 13585 2008-10-27 16:07:58Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_GC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42
43
44/*******************************************************************************
45* Internal Functions *
46*******************************************************************************/
47__BEGIN_DECLS
48static void pgmPoolFlushAllInt(PPGMPOOL pPool);
49#ifdef PGMPOOL_WITH_USER_TRACKING
50DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
51DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#endif
54#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
55static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
56#endif
57#ifdef PGMPOOL_WITH_CACHE
58static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
59#endif
60#ifdef PGMPOOL_WITH_MONITORING
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#endif
63#ifndef IN_RING3
64DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
65#endif
66__END_DECLS
67
68
69/**
70 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
71 *
72 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
73 * @param enmKind The page kind.
74 */
75DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
76{
77 switch (enmKind)
78 {
79 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
82 return true;
83 default:
84 return false;
85 }
86}
87
88
89#if defined(IN_GC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
90/**
91 * Maps a pool page into the current context.
92 *
93 * @returns Pointer to the mapping.
94 * @param pVM The VM handle.
95 * @param pPage The page to map.
96 */
97void *pgmPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
98{
99 /* general pages. */
100 if (pPage->idx >= PGMPOOL_IDX_FIRST)
101 {
102 Assert(pPage->idx < pVM->pgm.s.CTX_SUFF(pPool)->cCurPages);
103 void *pv;
104 int rc = PGMDynMapHCPage(pVM, pPage->Core.Key, &pv);
105 AssertReleaseRC(rc);
106 return pv;
107 }
108
109 /* special pages. */
110# ifdef IN_GC
111 switch (pPage->idx)
112 {
113 case PGMPOOL_IDX_PD:
114 return pVM->pgm.s.pGC32BitPD;
115 case PGMPOOL_IDX_PAE_PD:
116 case PGMPOOL_IDX_PAE_PD_0:
117 return pVM->pgm.s.apGCPaePDs[0];
118 case PGMPOOL_IDX_PAE_PD_1:
119 return pVM->pgm.s.apGCPaePDs[1];
120 case PGMPOOL_IDX_PAE_PD_2:
121 return pVM->pgm.s.apGCPaePDs[2];
122 case PGMPOOL_IDX_PAE_PD_3:
123 return pVM->pgm.s.apGCPaePDs[3];
124 case PGMPOOL_IDX_PDPT:
125 return pVM->pgm.s.pGCPaePDPT;
126 default:
127 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
128 return NULL;
129 }
130
131# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
132 RTHCPHYS HCPhys;
133 switch (pPage->idx)
134 {
135 case PGMPOOL_IDX_PD:
136 HCPhys = pVM->pgm.s.HCPhys32BitPD;
137 break;
138 case PGMPOOL_IDX_PAE_PD:
139 case PGMPOOL_IDX_PAE_PD_0:
140 HCPhys = pVM->pgm.s.aHCPhysPaePDs[0];
141 break;
142 case PGMPOOL_IDX_PAE_PD_1:
143 HCPhys = pVM->pgm.s.aHCPhysPaePDs[1];
144 break;
145 case PGMPOOL_IDX_PAE_PD_2:
146 HCPhys = pVM->pgm.s.aHCPhysPaePDs[2];
147 break;
148 case PGMPOOL_IDX_PAE_PD_3:
149 HCPhys = pVM->pgm.s.aHCPhysPaePDs[3];
150 break;
151 case PGMPOOL_IDX_PDPT:
152 HCPhys = pVM->pgm.s.HCPhysPaePDPT;
153 break;
154 default:
155 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
156 return NULL;
157 }
158 void *pv;
159 int rc = PGMDynMapHCPage(pVM, pPage->Core.Key, &pv);
160 AssertReleaseRC(rc);
161 return pv;
162# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
163}
164#endif /* IN_GC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
165
166
167#ifdef PGMPOOL_WITH_MONITORING
168/**
169 * Determin the size of a write instruction.
170 * @returns number of bytes written.
171 * @param pDis The disassembler state.
172 */
173static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
174{
175 /*
176 * This is very crude and possibly wrong for some opcodes,
177 * but since it's not really supposed to be called we can
178 * probably live with that.
179 */
180 return DISGetParamSize(pDis, &pDis->param1);
181}
182
183
184/**
185 * Flushes a chain of pages sharing the same access monitor.
186 *
187 * @returns VBox status code suitable for scheduling.
188 * @param pPool The pool.
189 * @param pPage A page in the chain.
190 */
191int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
192{
193 LogFlow(("pgmPoolMonitorChainFlush: Flush page %VGp type=%d\n", pPage->GCPhys, pPage->enmKind));
194
195 /*
196 * Find the list head.
197 */
198 uint16_t idx = pPage->idx;
199 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
200 {
201 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
202 {
203 idx = pPage->iMonitoredPrev;
204 Assert(idx != pPage->idx);
205 pPage = &pPool->aPages[idx];
206 }
207 }
208
209 /*
210 * Iterate the list flushing each shadow page.
211 */
212 int rc = VINF_SUCCESS;
213 for (;;)
214 {
215 idx = pPage->iMonitoredNext;
216 Assert(idx != pPage->idx);
217 if (pPage->idx >= PGMPOOL_IDX_FIRST)
218 {
219 int rc2 = pgmPoolFlushPage(pPool, pPage);
220 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
221 rc = VINF_PGM_SYNC_CR3;
222 }
223 /* next */
224 if (idx == NIL_PGMPOOL_IDX)
225 break;
226 pPage = &pPool->aPages[idx];
227 }
228 return rc;
229}
230
231
232/**
233 * Wrapper for getting the current context pointer to the entry being modified.
234 *
235 * @returns Pointer to the current context mapping of the entry.
236 * @param pPool The pool.
237 * @param pvFault The fault virtual address.
238 * @param GCPhysFault The fault physical address.
239 * @param cbEntry The entry size.
240 */
241#ifdef IN_RING3
242DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
243#else
244DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
245#endif
246{
247#ifdef IN_GC
248 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
249
250#elif defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
251 void *pvRet;
252 int rc = PGMDynMapGCPageOff(pPool->pVMR0, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
253 AssertFatalRCSuccess(rc);
254 return pvRet;
255
256#elif defined(IN_RING0)
257 void *pvRet;
258 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMR0->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
259 AssertFatalRCSuccess(rc);
260 return pvRet;
261
262#elif defined(IN_RING3)
263 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
264#else
265# error "huh?"
266#endif
267}
268
269
270/**
271 * Process shadow entries before they are changed by the guest.
272 *
273 * For PT entries we will clear them. For PD entries, we'll simply check
274 * for mapping conflicts and set the SyncCR3 FF if found.
275 *
276 * @param pPool The pool.
277 * @param pPage The head page.
278 * @param GCPhysFault The guest physical fault address.
279 * @param uAddress In R0 and GC this is the guest context fault address (flat).
280 * In R3 this is the host context 'fault' address.
281 * @param pCpu The disassembler state for figuring out the write size.
282 * This need not be specified if the caller knows we won't do cross entry accesses.
283 */
284#ifdef IN_RING3
285void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
286#else
287void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
288#endif
289{
290 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
291 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
292 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
293
294 LogFlow(("pgmPoolMonitorChainChanging: %VGv phys=%VGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
295
296 for (;;)
297 {
298 union
299 {
300 void *pv;
301 PX86PT pPT;
302 PX86PTPAE pPTPae;
303 PX86PD pPD;
304 PX86PDPAE pPDPae;
305 PX86PDPT pPDPT;
306 PX86PML4 pPML4;
307 } uShw;
308 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
309
310 switch (pPage->enmKind)
311 {
312 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
313 {
314 const unsigned iShw = off / sizeof(X86PTE);
315 if (uShw.pPT->a[iShw].n.u1Present)
316 {
317# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
318 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
319 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
320 pgmPoolTracDerefGCPhysHint(pPool, pPage,
321 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
322 pGstPte->u & X86_PTE_PG_MASK);
323# endif
324 uShw.pPT->a[iShw].u = 0;
325 }
326 break;
327 }
328
329 /* page/2 sized */
330 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
331 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
332 {
333 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
334 if (uShw.pPTPae->a[iShw].n.u1Present)
335 {
336# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
337 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
338 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
339 pgmPoolTracDerefGCPhysHint(pPool, pPage,
340 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
341 pGstPte->u & X86_PTE_PG_MASK);
342# endif
343 uShw.pPTPae->a[iShw].u = 0;
344 }
345 }
346 break;
347
348 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
349 {
350 const unsigned iShw = off / sizeof(X86PTEPAE);
351 if (uShw.pPTPae->a[iShw].n.u1Present)
352 {
353# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
354 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
355 Log4(("pgmPoolMonitorChainChanging pae: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
356 pgmPoolTracDerefGCPhysHint(pPool, pPage,
357 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
358 pGstPte->u & X86_PTE_PAE_PG_MASK);
359# endif
360 uShw.pPTPae->a[iShw].u = 0;
361 }
362
363 /* paranoia / a bit assumptive. */
364 if ( pCpu
365 && (off & 7)
366 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
367 {
368 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
369 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
370
371 if (uShw.pPTPae->a[iShw2].n.u1Present)
372 {
373# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
374 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
375 Log4(("pgmPoolMonitorChainChanging pae: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
376 pgmPoolTracDerefGCPhysHint(pPool, pPage,
377 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
378 pGstPte->u & X86_PTE_PAE_PG_MASK);
379# endif
380 uShw.pPTPae->a[iShw2].u = 0;
381 }
382 }
383
384 break;
385 }
386
387 case PGMPOOLKIND_ROOT_32BIT_PD:
388 {
389 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
390 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
391 {
392 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
393 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
394 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
395 }
396 /* paranoia / a bit assumptive. */
397 else if ( pCpu
398 && (off & 3)
399 && (off & 3) + cbWrite > sizeof(X86PTE))
400 {
401 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
402 if ( iShw2 != iShw
403 && iShw2 < RT_ELEMENTS(uShw.pPD->a)
404 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
405 {
406 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
407 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
408 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
409 }
410 }
411#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
412 if ( uShw.pPD->a[iShw].n.u1Present
413 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
414 {
415 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
416# ifdef IN_GC /* TLB load - we're pushing things a bit... */
417 ASMProbeReadByte(pvAddress);
418# endif
419 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
420 uShw.pPD->a[iShw].u = 0;
421 }
422#endif
423 break;
424 }
425
426 case PGMPOOLKIND_ROOT_PAE_PD:
427 {
428 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
429 for (unsigned i = 0; i < 2; i++, iShw++)
430 {
431 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
432 {
433 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
434 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
435 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
436 }
437 /* paranoia / a bit assumptive. */
438 else if ( pCpu
439 && (off & 3)
440 && (off & 3) + cbWrite > 4)
441 {
442 const unsigned iShw2 = iShw + 2;
443 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a)
444 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
445 {
446 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
447 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
448 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
449 }
450 }
451#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
452 if ( uShw.pPDPae->a[iShw].n.u1Present
453 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
454 {
455 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
456# ifdef IN_GC /* TLB load - we're pushing things a bit... */
457 ASMProbeReadByte(pvAddress);
458# endif
459 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
460 uShw.pPDPae->a[iShw].u = 0;
461 }
462#endif
463 }
464 break;
465 }
466
467 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
468 {
469 const unsigned iShw = off / sizeof(X86PDEPAE);
470 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
471 {
472 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
473 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
474 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
475 }
476#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
477 /*
478 * Causes trouble when the guest uses a PDE to refer to the whole page table level
479 * structure. (Invalidate here; faults later on when it tries to change the page
480 * table entries -> recheck; probably only applies to the RC case.)
481 */
482 else
483 {
484 if (uShw.pPDPae->a[iShw].n.u1Present)
485 {
486 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
487 pgmPoolFree(pPool->CTX_SUFF(pVM),
488 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
489 /* Note: hardcoded PAE implementation dependency */
490 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
491 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
492 uShw.pPDPae->a[iShw].u = 0;
493 }
494 }
495#endif
496 /* paranoia / a bit assumptive. */
497 if ( pCpu
498 && (off & 7)
499 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
500 {
501 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
502 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
503
504 if ( iShw2 != iShw
505 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
506 {
507 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
508 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
509 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
510 }
511#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
512 else if (uShw.pPDPae->a[iShw2].n.u1Present)
513 {
514 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
515 pgmPoolFree(pPool->CTX_SUFF(pVM),
516 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
517 /* Note: hardcoded PAE implementation dependency */
518 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
519 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
520 uShw.pPDPae->a[iShw2].u = 0;
521 }
522#endif
523 }
524 break;
525 }
526
527 case PGMPOOLKIND_ROOT_PDPT:
528 {
529 /*
530 * Hopefully this doesn't happen very often:
531 * - touching unused parts of the page
532 * - messing with the bits of pd pointers without changing the physical address
533 */
534 const unsigned iShw = off / sizeof(X86PDPE);
535 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
536 {
537 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
538 {
539 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
540 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
541 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
542 }
543 /* paranoia / a bit assumptive. */
544 else if ( pCpu
545 && (off & 7)
546 && (off & 7) + cbWrite > sizeof(X86PDPE))
547 {
548 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
549 if ( iShw2 != iShw
550 && iShw2 < X86_PG_PAE_PDPE_ENTRIES
551 && uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
552 {
553 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
554 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
555 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
556 }
557 }
558 }
559 break;
560 }
561
562#ifndef IN_GC
563 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
564 {
565 Assert(pPage->enmKind == PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD);
566
567 const unsigned iShw = off / sizeof(X86PDEPAE);
568 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
569 {
570 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
571 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
572 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
573 }
574 else
575 {
576 if (uShw.pPDPae->a[iShw].n.u1Present)
577 {
578 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
579 pgmPoolFree(pPool->CTX_SUFF(pVM),
580 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
581 pPage->idx,
582 iShw);
583 uShw.pPDPae->a[iShw].u = 0;
584 }
585 }
586 /* paranoia / a bit assumptive. */
587 if ( pCpu
588 && (off & 7)
589 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
590 {
591 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
592 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
593
594 if ( iShw2 != iShw
595 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
596 {
597 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
598 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
599 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
600 }
601 else
602 if (uShw.pPDPae->a[iShw2].n.u1Present)
603 {
604 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
605 pgmPoolFree(pPool->CTX_SUFF(pVM),
606 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
607 pPage->idx,
608 iShw2);
609 uShw.pPDPae->a[iShw2].u = 0;
610 }
611 }
612 break;
613 }
614
615 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
616 {
617 /*
618 * Hopefully this doesn't happen very often:
619 * - messing with the bits of pd pointers without changing the physical address
620 */
621 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
622 {
623 const unsigned iShw = off / sizeof(X86PDPE);
624 if (uShw.pPDPT->a[iShw].n.u1Present)
625 {
626 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
627 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
628 uShw.pPDPT->a[iShw].u = 0;
629 }
630 /* paranoia / a bit assumptive. */
631 if ( pCpu
632 && (off & 7)
633 && (off & 7) + cbWrite > sizeof(X86PDPE))
634 {
635 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
636 if (uShw.pPDPT->a[iShw2].n.u1Present)
637 {
638 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
639 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
640 uShw.pPDPT->a[iShw2].u = 0;
641 }
642 }
643 }
644 break;
645 }
646
647 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
648 {
649 /*
650 * Hopefully this doesn't happen very often:
651 * - messing with the bits of pd pointers without changing the physical address
652 */
653 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
654 {
655 const unsigned iShw = off / sizeof(X86PDPE);
656 if (uShw.pPML4->a[iShw].n.u1Present)
657 {
658 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
659 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
660 uShw.pPML4->a[iShw].u = 0;
661 }
662 /* paranoia / a bit assumptive. */
663 if ( pCpu
664 && (off & 7)
665 && (off & 7) + cbWrite > sizeof(X86PDPE))
666 {
667 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
668 if (uShw.pPML4->a[iShw2].n.u1Present)
669 {
670 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
671 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
672 uShw.pPML4->a[iShw2].u = 0;
673 }
674 }
675 }
676 break;
677 }
678#endif /* IN_RING0 */
679
680 default:
681 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
682 }
683
684 /* next */
685 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
686 return;
687 pPage = &pPool->aPages[pPage->iMonitoredNext];
688 }
689}
690
691
692# ifndef IN_RING3
693/**
694 * Checks if a access could be a fork operation in progress.
695 *
696 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
697 *
698 * @returns true if it's likly that we're forking, otherwise false.
699 * @param pPool The pool.
700 * @param pCpu The disassembled instruction.
701 * @param offFault The access offset.
702 */
703DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
704{
705 /*
706 * i386 linux is using btr to clear X86_PTE_RW.
707 * The functions involved are (2.6.16 source inspection):
708 * clear_bit
709 * ptep_set_wrprotect
710 * copy_one_pte
711 * copy_pte_range
712 * copy_pmd_range
713 * copy_pud_range
714 * copy_page_range
715 * dup_mmap
716 * dup_mm
717 * copy_mm
718 * copy_process
719 * do_fork
720 */
721 if ( pCpu->pCurInstr->opcode == OP_BTR
722 && !(offFault & 4)
723 /** @todo Validate that the bit index is X86_PTE_RW. */
724 )
725 {
726 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
727 return true;
728 }
729 return false;
730}
731
732
733/**
734 * Determine whether the page is likely to have been reused.
735 *
736 * @returns true if we consider the page as being reused for a different purpose.
737 * @returns false if we consider it to still be a paging page.
738 * @param pVM VM Handle.
739 * @param pPage The page in question.
740 * @param pRegFrame Trap register frame.
741 * @param pCpu The disassembly info for the faulting instruction.
742 * @param pvFault The fault address.
743 *
744 * @remark The REP prefix check is left to the caller because of STOSD/W.
745 */
746DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
747{
748#ifndef IN_GC
749 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
750 if ( HWACCMHasPendingIrq(pVM)
751 && (pRegFrame->rsp - pvFault) < 32)
752 {
753 /* Fault caused by stack writes while trying to inject an interrupt event. */
754 Log(("pgmPoolMonitorIsReused: reused %VGv for interrupt stack (rsp=%VGv).\n", pvFault, pRegFrame->rsp));
755 return true;
756 }
757#else
758 NOREF(pVM); NOREF(pvFault);
759#endif
760
761 switch (pCpu->pCurInstr->opcode)
762 {
763 /* call implies the actual push of the return address faulted */
764 case OP_CALL:
765 Log4(("pgmPoolMonitorIsReused: CALL\n"));
766 return true;
767 case OP_PUSH:
768 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
769 return true;
770 case OP_PUSHF:
771 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
772 return true;
773 case OP_PUSHA:
774 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
775 return true;
776 case OP_FXSAVE:
777 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
778 return true;
779 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
780 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
781 return true;
782 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
783 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
784 return true;
785 case OP_MOVSWD:
786 case OP_STOSWD:
787 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
788 && pRegFrame->rcx >= 0x40
789 )
790 {
791 Assert(pCpu->mode == CPUMODE_64BIT);
792
793 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
794 return true;
795 }
796 return false;
797 }
798 if ( (pCpu->param1.flags & USE_REG_GEN32)
799 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
800 {
801 Log4(("pgmPoolMonitorIsReused: ESP\n"));
802 return true;
803 }
804
805 //if (pPage->fCR3Mix)
806 // return false;
807 return false;
808}
809
810
811/**
812 * Flushes the page being accessed.
813 *
814 * @returns VBox status code suitable for scheduling.
815 * @param pVM The VM handle.
816 * @param pPool The pool.
817 * @param pPage The pool page (head).
818 * @param pCpu The disassembly of the write instruction.
819 * @param pRegFrame The trap register frame.
820 * @param GCPhysFault The fault address as guest physical address.
821 * @param pvFault The fault address.
822 */
823static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
824 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
825{
826 /*
827 * First, do the flushing.
828 */
829 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
830
831 /*
832 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
833 */
834 uint32_t cbWritten;
835 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
836 if (VBOX_SUCCESS(rc2))
837 pRegFrame->rip += pCpu->opsize;
838 else if (rc2 == VERR_EM_INTERPRETER)
839 {
840#ifdef IN_GC
841 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
842 {
843 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
844 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
845 rc = VINF_SUCCESS;
846 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
847 }
848 else
849#endif
850 {
851 rc = VINF_EM_RAW_EMULATE_INSTR;
852 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
853 }
854 }
855 else
856 rc = rc2;
857
858 /* See use in pgmPoolAccessHandlerSimple(). */
859 PGM_INVL_GUEST_TLBS();
860
861 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
862 return rc;
863
864}
865
866
867/**
868 * Handles the STOSD write accesses.
869 *
870 * @returns VBox status code suitable for scheduling.
871 * @param pVM The VM handle.
872 * @param pPool The pool.
873 * @param pPage The pool page (head).
874 * @param pCpu The disassembly of the write instruction.
875 * @param pRegFrame The trap register frame.
876 * @param GCPhysFault The fault address as guest physical address.
877 * @param pvFault The fault address.
878 */
879DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
880 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
881{
882 Assert(pCpu->mode == CPUMODE_32BIT);
883
884 /*
885 * Increment the modification counter and insert it into the list
886 * of modified pages the first time.
887 */
888 if (!pPage->cModifications++)
889 pgmPoolMonitorModifiedInsert(pPool, pPage);
890
891 /*
892 * Execute REP STOSD.
893 *
894 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
895 * write situation, meaning that it's safe to write here.
896 */
897 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
898 while (pRegFrame->ecx)
899 {
900 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
901#ifdef IN_GC
902 *(uint32_t *)pu32 = pRegFrame->eax;
903#else
904 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
905#endif
906 pu32 += 4;
907 GCPhysFault += 4;
908 pRegFrame->edi += 4;
909 pRegFrame->ecx--;
910 }
911 pRegFrame->rip += pCpu->opsize;
912
913 /* See use in pgmPoolAccessHandlerSimple(). */
914 PGM_INVL_GUEST_TLBS();
915
916 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
917 return VINF_SUCCESS;
918}
919
920
921/**
922 * Handles the simple write accesses.
923 *
924 * @returns VBox status code suitable for scheduling.
925 * @param pVM The VM handle.
926 * @param pPool The pool.
927 * @param pPage The pool page (head).
928 * @param pCpu The disassembly of the write instruction.
929 * @param pRegFrame The trap register frame.
930 * @param GCPhysFault The fault address as guest physical address.
931 * @param pvFault The fault address.
932 */
933DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
934 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
935{
936 /*
937 * Increment the modification counter and insert it into the list
938 * of modified pages the first time.
939 */
940 if (!pPage->cModifications++)
941 pgmPoolMonitorModifiedInsert(pPool, pPage);
942
943 /*
944 * Clear all the pages. ASSUMES that pvFault is readable.
945 */
946 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
947
948 /*
949 * Interpret the instruction.
950 */
951 uint32_t cb;
952 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
953 if (VBOX_SUCCESS(rc))
954 pRegFrame->rip += pCpu->opsize;
955 else if (rc == VERR_EM_INTERPRETER)
956 {
957 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
958 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
959 rc = VINF_EM_RAW_EMULATE_INSTR;
960 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
961 }
962
963 /*
964 * Quick hack, with logging enabled we're getting stale
965 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
966 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
967 * have to be fixed to support this. But that'll have to wait till next week.
968 *
969 * An alternative is to keep track of the changed PTEs together with the
970 * GCPhys from the guest PT. This may proove expensive though.
971 *
972 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
973 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
974 */
975 PGM_INVL_GUEST_TLBS();
976
977 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
978 return rc;
979}
980
981
982/**
983 * \#PF Handler callback for PT write accesses.
984 *
985 * @returns VBox status code (appropriate for GC return).
986 * @param pVM VM Handle.
987 * @param uErrorCode CPU Error code.
988 * @param pRegFrame Trap register frame.
989 * NULL on DMA and other non CPU access.
990 * @param pvFault The fault address (cr2).
991 * @param GCPhysFault The GC physical address corresponding to pvFault.
992 * @param pvUser User argument.
993 */
994DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
995{
996 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
997 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
998 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
999 LogFlow(("pgmPoolAccessHandler: pvFault=%VGv pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1000
1001 /*
1002 * We should ALWAYS have the list head as user parameter. This
1003 * is because we use that page to record the changes.
1004 */
1005 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1006
1007 /*
1008 * Disassemble the faulting instruction.
1009 */
1010 DISCPUSTATE Cpu;
1011 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1012 AssertRCReturn(rc, rc);
1013
1014 /*
1015 * Check if it's worth dealing with.
1016 */
1017 bool fReused = false;
1018 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1019 || pPage->fCR3Mix)
1020 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1021 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1022 {
1023 /*
1024 * Simple instructions, no REP prefix.
1025 */
1026 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1027 {
1028 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1029 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1030 return rc;
1031 }
1032
1033 /*
1034 * Windows is frequently doing small memset() operations (netio test 4k+).
1035 * We have to deal with these or we'll kill the cache and performance.
1036 */
1037 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1038 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1039 && pRegFrame->ecx <= 0x20
1040 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1041 && !((uintptr_t)pvFault & 3)
1042 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1043 && Cpu.mode == CPUMODE_32BIT
1044 && Cpu.opmode == CPUMODE_32BIT
1045 && Cpu.addrmode == CPUMODE_32BIT
1046 && Cpu.prefix == PREFIX_REP
1047 && !pRegFrame->eflags.Bits.u1DF
1048 )
1049 {
1050 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1051 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1052 return rc;
1053 }
1054
1055 /* REP prefix, don't bother. */
1056 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1057 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%VGv opcode=%d prefix=%#x\n",
1058 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1059 }
1060
1061 /*
1062 * Not worth it, so flush it.
1063 *
1064 * If we considered it to be reused, don't to back to ring-3
1065 * to emulate failed instructions since we usually cannot
1066 * interpret then. This may be a bit risky, in which case
1067 * the reuse detection must be fixed.
1068 */
1069 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1070 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1071 rc = VINF_SUCCESS;
1072 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1073 return rc;
1074}
1075
1076# endif /* !IN_RING3 */
1077#endif /* PGMPOOL_WITH_MONITORING */
1078
1079#ifdef PGMPOOL_WITH_CACHE
1080
1081/**
1082 * Inserts a page into the GCPhys hash table.
1083 *
1084 * @param pPool The pool.
1085 * @param pPage The page.
1086 */
1087DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1088{
1089 Log3(("pgmPoolHashInsert: %VGp\n", pPage->GCPhys));
1090 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1091 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1092 pPage->iNext = pPool->aiHash[iHash];
1093 pPool->aiHash[iHash] = pPage->idx;
1094}
1095
1096
1097/**
1098 * Removes a page from the GCPhys hash table.
1099 *
1100 * @param pPool The pool.
1101 * @param pPage The page.
1102 */
1103DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1104{
1105 Log3(("pgmPoolHashRemove: %VGp\n", pPage->GCPhys));
1106 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1107 if (pPool->aiHash[iHash] == pPage->idx)
1108 pPool->aiHash[iHash] = pPage->iNext;
1109 else
1110 {
1111 uint16_t iPrev = pPool->aiHash[iHash];
1112 for (;;)
1113 {
1114 const int16_t i = pPool->aPages[iPrev].iNext;
1115 if (i == pPage->idx)
1116 {
1117 pPool->aPages[iPrev].iNext = pPage->iNext;
1118 break;
1119 }
1120 if (i == NIL_PGMPOOL_IDX)
1121 {
1122 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1123 break;
1124 }
1125 iPrev = i;
1126 }
1127 }
1128 pPage->iNext = NIL_PGMPOOL_IDX;
1129}
1130
1131
1132/**
1133 * Frees up one cache page.
1134 *
1135 * @returns VBox status code.
1136 * @retval VINF_SUCCESS on success.
1137 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1138 * @param pPool The pool.
1139 * @param iUser The user index.
1140 */
1141static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1142{
1143#ifndef IN_GC
1144 const PVM pVM = pPool->CTX_SUFF(pVM);
1145#endif
1146 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1147 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1148
1149 /*
1150 * Select one page from the tail of the age list.
1151 */
1152 uint16_t iToFree = pPool->iAgeTail;
1153 if (iToFree == iUser)
1154 iToFree = pPool->aPages[iToFree].iAgePrev;
1155/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1156 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1157 {
1158 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1159 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1160 {
1161 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1162 continue;
1163 iToFree = i;
1164 break;
1165 }
1166 }
1167*/
1168
1169 Assert(iToFree != iUser);
1170 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1171
1172 PPGMPOOLPAGE pPage = &pPool->aPages[iToFree];
1173
1174 /*
1175 * Reject any attempts at flushing the currently active shadow CR3 mapping
1176 */
1177 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
1178 {
1179 /* Refresh the cr3 mapping by putting it at the head of the age list. */
1180 pgmPoolCacheUsed(pPool, pPage);
1181 return pgmPoolCacheFreeOne(pPool, iUser);
1182 }
1183
1184 int rc = pgmPoolFlushPage(pPool, pPage);
1185 if (rc == VINF_SUCCESS)
1186 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1187 return rc;
1188}
1189
1190
1191/**
1192 * Checks if a kind mismatch is really a page being reused
1193 * or if it's just normal remappings.
1194 *
1195 * @returns true if reused and the cached page (enmKind1) should be flushed
1196 * @returns false if not reused.
1197 * @param enmKind1 The kind of the cached page.
1198 * @param enmKind2 The kind of the requested page.
1199 */
1200static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1201{
1202 switch (enmKind1)
1203 {
1204 /*
1205 * Never reuse them. There is no remapping in non-paging mode.
1206 */
1207 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1208 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1209 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1210 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1211 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1212 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1213 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1214 return true;
1215
1216 /*
1217 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1218 */
1219 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1220 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1221 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1222 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1223 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1224 switch (enmKind2)
1225 {
1226 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1227 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1228 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1229 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1230 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1231 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1232 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1233 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1234 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1235 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1236 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1237 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1238 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1239 return true;
1240 default:
1241 return false;
1242 }
1243
1244 /*
1245 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1246 */
1247 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1248 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1249 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1250 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1251 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1252 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1253 switch (enmKind2)
1254 {
1255 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1256 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1257 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1258 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1259 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1260 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1261 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1262 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1263 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1264 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1265 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1266 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1267 return true;
1268 default:
1269 return false;
1270 }
1271
1272 /*
1273 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1274 */
1275 case PGMPOOLKIND_ROOT_32BIT_PD:
1276 case PGMPOOLKIND_ROOT_PAE_PD:
1277 case PGMPOOLKIND_ROOT_PDPT:
1278 case PGMPOOLKIND_ROOT_NESTED:
1279 return false;
1280
1281 default:
1282 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1283 }
1284}
1285
1286
1287/**
1288 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1289 *
1290 * @returns VBox status code.
1291 * @retval VINF_PGM_CACHED_PAGE on success.
1292 * @retval VERR_FILE_NOT_FOUND if not found.
1293 * @param pPool The pool.
1294 * @param GCPhys The GC physical address of the page we're gonna shadow.
1295 * @param enmKind The kind of mapping.
1296 * @param iUser The shadow page pool index of the user table.
1297 * @param iUserTable The index into the user table (shadowed).
1298 * @param ppPage Where to store the pointer to the page.
1299 */
1300static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1301{
1302#ifndef IN_GC
1303 const PVM pVM = pPool->CTX_SUFF(pVM);
1304#endif
1305 /*
1306 * Look up the GCPhys in the hash.
1307 */
1308 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1309 Log3(("pgmPoolCacheAlloc: %VGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1310 if (i != NIL_PGMPOOL_IDX)
1311 {
1312 do
1313 {
1314 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1315 Log3(("pgmPoolCacheAlloc: slot %d found page %VGp\n", i, pPage->GCPhys));
1316 if (pPage->GCPhys == GCPhys)
1317 {
1318 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1319 {
1320 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1321 if (VBOX_SUCCESS(rc))
1322 {
1323 *ppPage = pPage;
1324 STAM_COUNTER_INC(&pPool->StatCacheHits);
1325 return VINF_PGM_CACHED_PAGE;
1326 }
1327 return rc;
1328 }
1329
1330 /*
1331 * The kind is different. In some cases we should now flush the page
1332 * as it has been reused, but in most cases this is normal remapping
1333 * of PDs as PT or big pages using the GCPhys field in a slightly
1334 * different way than the other kinds.
1335 */
1336 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1337 {
1338 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1339 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1340 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1341 break;
1342 }
1343 }
1344
1345 /* next */
1346 i = pPage->iNext;
1347 } while (i != NIL_PGMPOOL_IDX);
1348 }
1349
1350 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1351 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1352 return VERR_FILE_NOT_FOUND;
1353}
1354
1355
1356/**
1357 * Inserts a page into the cache.
1358 *
1359 * @param pPool The pool.
1360 * @param pPage The cached page.
1361 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1362 */
1363static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1364{
1365 /*
1366 * Insert into the GCPhys hash if the page is fit for that.
1367 */
1368 Assert(!pPage->fCached);
1369 if (fCanBeCached)
1370 {
1371 pPage->fCached = true;
1372 pgmPoolHashInsert(pPool, pPage);
1373 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1374 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1375 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1376 }
1377 else
1378 {
1379 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1380 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1381 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1382 }
1383
1384 /*
1385 * Insert at the head of the age list.
1386 */
1387 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1388 pPage->iAgeNext = pPool->iAgeHead;
1389 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1390 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1391 else
1392 pPool->iAgeTail = pPage->idx;
1393 pPool->iAgeHead = pPage->idx;
1394}
1395
1396
1397/**
1398 * Flushes a cached page.
1399 *
1400 * @param pPool The pool.
1401 * @param pPage The cached page.
1402 */
1403static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1404{
1405 Log3(("pgmPoolCacheFlushPage: %VGp\n", pPage->GCPhys));
1406
1407 /*
1408 * Remove the page from the hash.
1409 */
1410 if (pPage->fCached)
1411 {
1412 pPage->fCached = false;
1413 pgmPoolHashRemove(pPool, pPage);
1414 }
1415 else
1416 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1417
1418 /*
1419 * Remove it from the age list.
1420 */
1421 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1422 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1423 else
1424 pPool->iAgeTail = pPage->iAgePrev;
1425 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1426 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1427 else
1428 pPool->iAgeHead = pPage->iAgeNext;
1429 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1430 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1431}
1432
1433#endif /* PGMPOOL_WITH_CACHE */
1434#ifdef PGMPOOL_WITH_MONITORING
1435
1436/**
1437 * Looks for pages sharing the monitor.
1438 *
1439 * @returns Pointer to the head page.
1440 * @returns NULL if not found.
1441 * @param pPool The Pool
1442 * @param pNewPage The page which is going to be monitored.
1443 */
1444static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1445{
1446#ifdef PGMPOOL_WITH_CACHE
1447 /*
1448 * Look up the GCPhys in the hash.
1449 */
1450 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1451 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1452 if (i == NIL_PGMPOOL_IDX)
1453 return NULL;
1454 do
1455 {
1456 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1457 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1458 && pPage != pNewPage)
1459 {
1460 switch (pPage->enmKind)
1461 {
1462 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1463 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1464 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1465 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1466 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1467 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1468 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1469 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1470 case PGMPOOLKIND_ROOT_32BIT_PD:
1471 case PGMPOOLKIND_ROOT_PAE_PD:
1472 case PGMPOOLKIND_ROOT_PDPT:
1473 {
1474 /* find the head */
1475 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1476 {
1477 Assert(pPage->iMonitoredPrev != pPage->idx);
1478 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1479 }
1480 return pPage;
1481 }
1482
1483 /* ignore, no monitoring. */
1484 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1485 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1486 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1487 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1488 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1489 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1490 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1491 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1492 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1493 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1494 case PGMPOOLKIND_ROOT_NESTED:
1495 break;
1496 default:
1497 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1498 }
1499 }
1500
1501 /* next */
1502 i = pPage->iNext;
1503 } while (i != NIL_PGMPOOL_IDX);
1504#endif
1505 return NULL;
1506}
1507
1508
1509/**
1510 * Enabled write monitoring of a guest page.
1511 *
1512 * @returns VBox status code.
1513 * @retval VINF_SUCCESS on success.
1514 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1515 * @param pPool The pool.
1516 * @param pPage The cached page.
1517 */
1518static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1519{
1520 LogFlow(("pgmPoolMonitorInsert %VGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1521
1522 /*
1523 * Filter out the relevant kinds.
1524 */
1525 switch (pPage->enmKind)
1526 {
1527 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1528 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1529 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1530 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1531 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1532 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1533 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1534 case PGMPOOLKIND_ROOT_PDPT:
1535 break;
1536
1537 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1538 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1539 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1540 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1541 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1542 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1543 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1544 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1545 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1546 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1547 case PGMPOOLKIND_ROOT_NESTED:
1548 /* Nothing to monitor here. */
1549 return VINF_SUCCESS;
1550
1551 case PGMPOOLKIND_ROOT_32BIT_PD:
1552 case PGMPOOLKIND_ROOT_PAE_PD:
1553#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1554 break;
1555#endif
1556 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1557 default:
1558 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1559 }
1560
1561 /*
1562 * Install handler.
1563 */
1564 int rc;
1565 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1566 if (pPageHead)
1567 {
1568 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1569 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1570 pPage->iMonitoredPrev = pPageHead->idx;
1571 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1572 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1573 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1574 pPageHead->iMonitoredNext = pPage->idx;
1575 rc = VINF_SUCCESS;
1576 }
1577 else
1578 {
1579 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1580 PVM pVM = pPool->CTX_SUFF(pVM);
1581 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1582 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1583 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1584 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1585 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1586 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1587 pPool->pszAccessHandler);
1588 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1589 * the heap size should suffice. */
1590 AssertFatalRC(rc);
1591 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1592 rc = VERR_PGM_POOL_CLEARED;
1593 }
1594 pPage->fMonitored = true;
1595 return rc;
1596}
1597
1598
1599/**
1600 * Disables write monitoring of a guest page.
1601 *
1602 * @returns VBox status code.
1603 * @retval VINF_SUCCESS on success.
1604 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1605 * @param pPool The pool.
1606 * @param pPage The cached page.
1607 */
1608static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1609{
1610 /*
1611 * Filter out the relevant kinds.
1612 */
1613 switch (pPage->enmKind)
1614 {
1615 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1616 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1617 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1618 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1619 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1620 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1621 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1622 case PGMPOOLKIND_ROOT_PDPT:
1623 break;
1624
1625 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1626 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1627 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1628 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1629 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1630 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1631 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1632 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1633 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1634 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1635 case PGMPOOLKIND_ROOT_NESTED:
1636 /* Nothing to monitor here. */
1637 return VINF_SUCCESS;
1638
1639 case PGMPOOLKIND_ROOT_32BIT_PD:
1640 case PGMPOOLKIND_ROOT_PAE_PD:
1641#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1642 break;
1643#endif
1644 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1645 default:
1646 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1647 }
1648
1649 /*
1650 * Remove the page from the monitored list or uninstall it if last.
1651 */
1652 const PVM pVM = pPool->CTX_SUFF(pVM);
1653 int rc;
1654 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1655 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1656 {
1657 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1658 {
1659 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1660 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1661 pNewHead->fCR3Mix = pPage->fCR3Mix;
1662 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1663 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1664 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1665 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
1666 pPool->pszAccessHandler);
1667 AssertFatalRCSuccess(rc);
1668 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1669 }
1670 else
1671 {
1672 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1673 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1674 {
1675 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1676 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1677 }
1678 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1679 rc = VINF_SUCCESS;
1680 }
1681 }
1682 else
1683 {
1684 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1685 AssertFatalRC(rc);
1686 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1687 rc = VERR_PGM_POOL_CLEARED;
1688 }
1689 pPage->fMonitored = false;
1690
1691 /*
1692 * Remove it from the list of modified pages (if in it).
1693 */
1694 pgmPoolMonitorModifiedRemove(pPool, pPage);
1695
1696 return rc;
1697}
1698
1699# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1700
1701/**
1702 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1703 *
1704 * @param pPool The Pool.
1705 * @param pPage A page in the chain.
1706 * @param fCR3Mix The new fCR3Mix value.
1707 */
1708static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1709{
1710 /* current */
1711 pPage->fCR3Mix = fCR3Mix;
1712
1713 /* before */
1714 int16_t idx = pPage->iMonitoredPrev;
1715 while (idx != NIL_PGMPOOL_IDX)
1716 {
1717 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1718 idx = pPool->aPages[idx].iMonitoredPrev;
1719 }
1720
1721 /* after */
1722 idx = pPage->iMonitoredNext;
1723 while (idx != NIL_PGMPOOL_IDX)
1724 {
1725 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1726 idx = pPool->aPages[idx].iMonitoredNext;
1727 }
1728}
1729
1730
1731/**
1732 * Installs or modifies monitoring of a CR3 page (special).
1733 *
1734 * We're pretending the CR3 page is shadowed by the pool so we can use the
1735 * generic mechanisms in detecting chained monitoring. (This also gives us a
1736 * tast of what code changes are required to really pool CR3 shadow pages.)
1737 *
1738 * @returns VBox status code.
1739 * @param pPool The pool.
1740 * @param idxRoot The CR3 (root) page index.
1741 * @param GCPhysCR3 The (new) CR3 value.
1742 */
1743int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1744{
1745 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1746 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1747 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1748 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1749
1750 /*
1751 * The unlikely case where it already matches.
1752 */
1753 if (pPage->GCPhys == GCPhysCR3)
1754 {
1755 Assert(pPage->fMonitored);
1756 return VINF_SUCCESS;
1757 }
1758
1759 /*
1760 * Flush the current monitoring and remove it from the hash.
1761 */
1762 int rc = VINF_SUCCESS;
1763 if (pPage->fMonitored)
1764 {
1765 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1766 rc = pgmPoolMonitorFlush(pPool, pPage);
1767 if (rc == VERR_PGM_POOL_CLEARED)
1768 rc = VINF_SUCCESS;
1769 else
1770 AssertFatalRC(rc);
1771 pgmPoolHashRemove(pPool, pPage);
1772 }
1773
1774 /*
1775 * Monitor the page at the new location and insert it into the hash.
1776 */
1777 pPage->GCPhys = GCPhysCR3;
1778 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1779 if (rc2 != VERR_PGM_POOL_CLEARED)
1780 {
1781 AssertFatalRC(rc2);
1782 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1783 rc = rc2;
1784 }
1785 pgmPoolHashInsert(pPool, pPage);
1786 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1787 return rc;
1788}
1789
1790
1791/**
1792 * Removes the monitoring of a CR3 page (special).
1793 *
1794 * @returns VBox status code.
1795 * @param pPool The pool.
1796 * @param idxRoot The CR3 (root) page index.
1797 */
1798int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1799{
1800 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1801 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1802 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1803 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1804
1805 if (!pPage->fMonitored)
1806 return VINF_SUCCESS;
1807
1808 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1809 int rc = pgmPoolMonitorFlush(pPool, pPage);
1810 if (rc != VERR_PGM_POOL_CLEARED)
1811 AssertFatalRC(rc);
1812 else
1813 rc = VINF_SUCCESS;
1814 pgmPoolHashRemove(pPool, pPage);
1815 Assert(!pPage->fMonitored);
1816 pPage->GCPhys = NIL_RTGCPHYS;
1817 return rc;
1818}
1819
1820# endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1821
1822/**
1823 * Inserts the page into the list of modified pages.
1824 *
1825 * @param pPool The pool.
1826 * @param pPage The page.
1827 */
1828void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1829{
1830 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1831 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1832 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1833 && pPool->iModifiedHead != pPage->idx,
1834 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1835 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1836 pPool->iModifiedHead, pPool->cModifiedPages));
1837
1838 pPage->iModifiedNext = pPool->iModifiedHead;
1839 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1840 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1841 pPool->iModifiedHead = pPage->idx;
1842 pPool->cModifiedPages++;
1843#ifdef VBOX_WITH_STATISTICS
1844 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1845 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1846#endif
1847}
1848
1849
1850/**
1851 * Removes the page from the list of modified pages and resets the
1852 * moficiation counter.
1853 *
1854 * @param pPool The pool.
1855 * @param pPage The page which is believed to be in the list of modified pages.
1856 */
1857static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1858{
1859 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1860 if (pPool->iModifiedHead == pPage->idx)
1861 {
1862 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1863 pPool->iModifiedHead = pPage->iModifiedNext;
1864 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1865 {
1866 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1867 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1868 }
1869 pPool->cModifiedPages--;
1870 }
1871 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1872 {
1873 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1874 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1875 {
1876 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1877 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1878 }
1879 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1880 pPool->cModifiedPages--;
1881 }
1882 else
1883 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1884 pPage->cModifications = 0;
1885}
1886
1887
1888/**
1889 * Zaps the list of modified pages, resetting their modification counters in the process.
1890 *
1891 * @param pVM The VM handle.
1892 */
1893void pgmPoolMonitorModifiedClearAll(PVM pVM)
1894{
1895 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1896 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1897
1898 unsigned cPages = 0; NOREF(cPages);
1899 uint16_t idx = pPool->iModifiedHead;
1900 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1901 while (idx != NIL_PGMPOOL_IDX)
1902 {
1903 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1904 idx = pPage->iModifiedNext;
1905 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1906 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1907 pPage->cModifications = 0;
1908 Assert(++cPages);
1909 }
1910 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1911 pPool->cModifiedPages = 0;
1912}
1913
1914
1915/**
1916 * Clear all shadow pages and clear all modification counters.
1917 *
1918 * @param pVM The VM handle.
1919 * @remark Should only be used when monitoring is available, thus placed in
1920 * the PGMPOOL_WITH_MONITORING #ifdef.
1921 */
1922void pgmPoolClearAll(PVM pVM)
1923{
1924 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1925 STAM_PROFILE_START(&pPool->StatClearAll, c);
1926 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1927
1928 /*
1929 * Iterate all the pages until we've encountered all that in use.
1930 * This is simple but not quite optimal solution.
1931 */
1932 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1933 unsigned cLeft = pPool->cUsedPages;
1934 unsigned iPage = pPool->cCurPages;
1935 while (--iPage >= PGMPOOL_IDX_FIRST)
1936 {
1937 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1938 if (pPage->GCPhys != NIL_RTGCPHYS)
1939 {
1940 switch (pPage->enmKind)
1941 {
1942 /*
1943 * We only care about shadow page tables.
1944 */
1945 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1946 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1947 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1948 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1949 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1950 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1951 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1952 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1953 {
1954#ifdef PGMPOOL_WITH_USER_TRACKING
1955 if (pPage->cPresent)
1956#endif
1957 {
1958 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1959 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1960 ASMMemZeroPage(pvShw);
1961 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1962#ifdef PGMPOOL_WITH_USER_TRACKING
1963 pPage->cPresent = 0;
1964 pPage->iFirstPresent = ~0;
1965#endif
1966 }
1967 }
1968 /* fall thru */
1969
1970 default:
1971 Assert(!pPage->cModifications || ++cModifiedPages);
1972 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1973 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1974 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1975 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1976 pPage->cModifications = 0;
1977 break;
1978
1979 }
1980 if (!--cLeft)
1981 break;
1982 }
1983 }
1984
1985 /* swipe the special pages too. */
1986 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1987 {
1988 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1989 if (pPage->GCPhys != NIL_RTGCPHYS)
1990 {
1991 Assert(!pPage->cModifications || ++cModifiedPages);
1992 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1993 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1994 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1995 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1996 pPage->cModifications = 0;
1997 }
1998 }
1999
2000#ifndef DEBUG_michael
2001 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2002#endif
2003 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2004 pPool->cModifiedPages = 0;
2005
2006#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2007 /*
2008 * Clear all the GCPhys links and rebuild the phys ext free list.
2009 */
2010 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2011 pRam;
2012 pRam = pRam->CTX_SUFF(pNext))
2013 {
2014 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2015 while (iPage-- > 0)
2016 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2017 }
2018
2019 pPool->iPhysExtFreeHead = 0;
2020 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2021 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2022 for (unsigned i = 0; i < cMaxPhysExts; i++)
2023 {
2024 paPhysExts[i].iNext = i + 1;
2025 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2026 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2027 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2028 }
2029 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2030#endif
2031
2032
2033 pPool->cPresent = 0;
2034 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2035}
2036
2037
2038/**
2039 * Handle SyncCR3 pool tasks
2040 *
2041 * @returns VBox status code.
2042 * @retval VINF_SUCCESS if successfully added.
2043 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2044 * @param pVM The VM handle.
2045 * @remark Should only be used when monitoring is available, thus placed in
2046 * the PGMPOOL_WITH_MONITORING #ifdef.
2047 */
2048int pgmPoolSyncCR3(PVM pVM)
2049{
2050 /*
2051 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2052 * Occasionally we will have to clear all the shadow page tables because we wanted
2053 * to monitor a page which was mapped by too many shadowed page tables. This operation
2054 * sometimes refered to as a 'lightweight flush'.
2055 */
2056 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2057 pgmPoolMonitorModifiedClearAll(pVM);
2058 else
2059 {
2060# ifndef IN_GC
2061 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2062 pgmPoolClearAll(pVM);
2063# else
2064 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2065 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2066 return VINF_PGM_SYNC_CR3;
2067# endif
2068 }
2069 return VINF_SUCCESS;
2070}
2071
2072#endif /* PGMPOOL_WITH_MONITORING */
2073#ifdef PGMPOOL_WITH_USER_TRACKING
2074
2075/**
2076 * Frees up at least one user entry.
2077 *
2078 * @returns VBox status code.
2079 * @retval VINF_SUCCESS if successfully added.
2080 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2081 * @param pPool The pool.
2082 * @param iUser The user index.
2083 */
2084static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2085{
2086 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2087#ifdef PGMPOOL_WITH_CACHE
2088 /*
2089 * Just free cached pages in a braindead fashion.
2090 */
2091 /** @todo walk the age list backwards and free the first with usage. */
2092 int rc = VINF_SUCCESS;
2093 do
2094 {
2095 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2096 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
2097 rc = rc2;
2098 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2099 return rc;
2100#else
2101 /*
2102 * Lazy approach.
2103 */
2104 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2105 Assert(!CPUMIsGuestInLongMode(pVM));
2106 pgmPoolFlushAllInt(pPool);
2107 return VERR_PGM_POOL_FLUSHED;
2108#endif
2109}
2110
2111
2112/**
2113 * Inserts a page into the cache.
2114 *
2115 * This will create user node for the page, insert it into the GCPhys
2116 * hash, and insert it into the age list.
2117 *
2118 * @returns VBox status code.
2119 * @retval VINF_SUCCESS if successfully added.
2120 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2121 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2122 * @param pPool The pool.
2123 * @param pPage The cached page.
2124 * @param GCPhys The GC physical address of the page we're gonna shadow.
2125 * @param iUser The user index.
2126 * @param iUserTable The user table index.
2127 */
2128DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2129{
2130 int rc = VINF_SUCCESS;
2131 PPGMPOOLUSER pUser = pPool->CTX_SUFF(paUsers);
2132
2133 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
2134
2135 /*
2136 * Find free a user node.
2137 */
2138 uint16_t i = pPool->iUserFreeHead;
2139 if (i == NIL_PGMPOOL_USER_INDEX)
2140 {
2141 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2142 if (VBOX_FAILURE(rc))
2143 return rc;
2144 i = pPool->iUserFreeHead;
2145 }
2146
2147 /*
2148 * Unlink the user node from the free list,
2149 * initialize and insert it into the user list.
2150 */
2151 pPool->iUserFreeHead = pUser[i].iNext;
2152 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
2153 pUser[i].iUser = iUser;
2154 pUser[i].iUserTable = iUserTable;
2155 pPage->iUserHead = i;
2156
2157 /*
2158 * Insert into cache and enable monitoring of the guest page if enabled.
2159 *
2160 * Until we implement caching of all levels, including the CR3 one, we'll
2161 * have to make sure we don't try monitor & cache any recursive reuse of
2162 * a monitored CR3 page. Because all windows versions are doing this we'll
2163 * have to be able to do combined access monitoring, CR3 + PT and
2164 * PD + PT (guest PAE).
2165 *
2166 * Update:
2167 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2168 */
2169#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2170# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2171 const bool fCanBeMonitored = true;
2172# else
2173 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2174 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2175 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2176# endif
2177# ifdef PGMPOOL_WITH_CACHE
2178 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2179# endif
2180 if (fCanBeMonitored)
2181 {
2182# ifdef PGMPOOL_WITH_MONITORING
2183 rc = pgmPoolMonitorInsert(pPool, pPage);
2184 if (rc == VERR_PGM_POOL_CLEARED)
2185 {
2186 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2187# ifndef PGMPOOL_WITH_CACHE
2188 pgmPoolMonitorFlush(pPool, pPage);
2189 rc = VERR_PGM_POOL_FLUSHED;
2190# endif
2191 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2192 pUser[i].iNext = pPool->iUserFreeHead;
2193 pUser[i].iUser = NIL_PGMPOOL_IDX;
2194 pPool->iUserFreeHead = i;
2195 }
2196 }
2197# endif
2198#endif /* PGMPOOL_WITH_MONITORING */
2199 return rc;
2200}
2201
2202
2203# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2204/**
2205 * Adds a user reference to a page.
2206 *
2207 * This will
2208 * This will move the page to the head of the
2209 *
2210 * @returns VBox status code.
2211 * @retval VINF_SUCCESS if successfully added.
2212 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2213 * @param pPool The pool.
2214 * @param pPage The cached page.
2215 * @param iUser The user index.
2216 * @param iUserTable The user table.
2217 */
2218static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2219{
2220 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2221
2222 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
2223# ifdef VBOX_STRICT
2224 /*
2225 * Check that the entry doesn't already exists.
2226 */
2227 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2228 {
2229 uint16_t i = pPage->iUserHead;
2230 do
2231 {
2232 Assert(i < pPool->cMaxUsers);
2233 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2234 i = paUsers[i].iNext;
2235 } while (i != NIL_PGMPOOL_USER_INDEX);
2236 }
2237# endif
2238
2239 /*
2240 * Allocate a user node.
2241 */
2242 uint16_t i = pPool->iUserFreeHead;
2243 if (i == NIL_PGMPOOL_USER_INDEX)
2244 {
2245 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2246 if (VBOX_FAILURE(rc))
2247 return rc;
2248 i = pPool->iUserFreeHead;
2249 }
2250 pPool->iUserFreeHead = paUsers[i].iNext;
2251
2252 /*
2253 * Initialize the user node and insert it.
2254 */
2255 paUsers[i].iNext = pPage->iUserHead;
2256 paUsers[i].iUser = iUser;
2257 paUsers[i].iUserTable = iUserTable;
2258 pPage->iUserHead = i;
2259
2260# ifdef PGMPOOL_WITH_CACHE
2261 /*
2262 * Tell the cache to update its replacement stats for this page.
2263 */
2264 pgmPoolCacheUsed(pPool, pPage);
2265# endif
2266 return VINF_SUCCESS;
2267}
2268# endif /* PGMPOOL_WITH_CACHE */
2269
2270
2271/**
2272 * Frees a user record associated with a page.
2273 *
2274 * This does not clear the entry in the user table, it simply replaces the
2275 * user record to the chain of free records.
2276 *
2277 * @param pPool The pool.
2278 * @param HCPhys The HC physical address of the shadow page.
2279 * @param iUser The shadow page pool index of the user table.
2280 * @param iUserTable The index into the user table (shadowed).
2281 */
2282static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2283{
2284 /*
2285 * Unlink and free the specified user entry.
2286 */
2287 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2288
2289 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2290 uint16_t i = pPage->iUserHead;
2291 if ( i != NIL_PGMPOOL_USER_INDEX
2292 && paUsers[i].iUser == iUser
2293 && paUsers[i].iUserTable == iUserTable)
2294 {
2295 pPage->iUserHead = paUsers[i].iNext;
2296
2297 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2298 paUsers[i].iNext = pPool->iUserFreeHead;
2299 pPool->iUserFreeHead = i;
2300 return;
2301 }
2302
2303 /* General: Linear search. */
2304 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2305 while (i != NIL_PGMPOOL_USER_INDEX)
2306 {
2307 if ( paUsers[i].iUser == iUser
2308 && paUsers[i].iUserTable == iUserTable)
2309 {
2310 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2311 paUsers[iPrev].iNext = paUsers[i].iNext;
2312 else
2313 pPage->iUserHead = paUsers[i].iNext;
2314
2315 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2316 paUsers[i].iNext = pPool->iUserFreeHead;
2317 pPool->iUserFreeHead = i;
2318 return;
2319 }
2320 iPrev = i;
2321 i = paUsers[i].iNext;
2322 }
2323
2324 /* Fatal: didn't find it */
2325 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
2326 iUser, iUserTable, pPage->GCPhys));
2327}
2328
2329
2330/**
2331 * Gets the entry size of a shadow table.
2332 *
2333 * @param enmKind The kind of page.
2334 *
2335 * @returns The size of the entry in bytes. That is, 4 or 8.
2336 * @returns If the kind is not for a table, an assertion is raised and 0 is
2337 * returned.
2338 */
2339DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2340{
2341 switch (enmKind)
2342 {
2343 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2344 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2345 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2346 case PGMPOOLKIND_ROOT_32BIT_PD:
2347 return 4;
2348
2349 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2350 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2351 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2352 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2353 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2354 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2355 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2356 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2357 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2358 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2359 case PGMPOOLKIND_ROOT_PAE_PD:
2360 case PGMPOOLKIND_ROOT_PDPT:
2361 case PGMPOOLKIND_ROOT_NESTED:
2362 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2363 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2364 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2365 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2366 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2367 return 8;
2368
2369 default:
2370 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2371 }
2372}
2373
2374
2375/**
2376 * Gets the entry size of a guest table.
2377 *
2378 * @param enmKind The kind of page.
2379 *
2380 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2381 * @returns If the kind is not for a table, an assertion is raised and 0 is
2382 * returned.
2383 */
2384DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2385{
2386 switch (enmKind)
2387 {
2388 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2389 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2390 case PGMPOOLKIND_ROOT_32BIT_PD:
2391 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2392 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2393 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2394 return 4;
2395
2396 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2397 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2398 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2399 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2400 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2401 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2402 case PGMPOOLKIND_ROOT_PAE_PD:
2403 case PGMPOOLKIND_ROOT_PDPT:
2404 return 8;
2405
2406 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2407 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2408 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2409 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2410 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2411 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2412 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2413 case PGMPOOLKIND_ROOT_NESTED:
2414 /** @todo can we return 0? (nobody is calling this...) */
2415 AssertFailed();
2416 return 0;
2417
2418 default:
2419 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2420 }
2421}
2422
2423#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2424
2425/**
2426 * Scans one shadow page table for mappings of a physical page.
2427 *
2428 * @param pVM The VM handle.
2429 * @param pPhysPage The guest page in question.
2430 * @param iShw The shadow page table.
2431 * @param cRefs The number of references made in that PT.
2432 */
2433static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2434{
2435 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2436 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2437
2438 /*
2439 * Assert sanity.
2440 */
2441 Assert(cRefs == 1);
2442 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2443 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2444
2445 /*
2446 * Then, clear the actual mappings to the page in the shadow PT.
2447 */
2448 switch (pPage->enmKind)
2449 {
2450 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2451 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2452 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2453 {
2454 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2455 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2456 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2457 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2458 {
2459 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2460 pPT->a[i].u = 0;
2461 cRefs--;
2462 if (!cRefs)
2463 return;
2464 }
2465#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2466 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2467 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2468 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2469 {
2470 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2471 pPT->a[i].u = 0;
2472 }
2473#endif
2474 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2475 break;
2476 }
2477
2478 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2479 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2480 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2481 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2482 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2483 {
2484 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2485 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2486 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2487 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2488 {
2489 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2490 pPT->a[i].u = 0;
2491 cRefs--;
2492 if (!cRefs)
2493 return;
2494 }
2495#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2496 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2497 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2498 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2499 {
2500 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2501 pPT->a[i].u = 0;
2502 }
2503#endif
2504 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2505 break;
2506 }
2507
2508 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2509 {
2510 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2511 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2512 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2513 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2514 {
2515 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2516 pPT->a[i].u = 0;
2517 cRefs--;
2518 if (!cRefs)
2519 return;
2520 }
2521#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2522 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2523 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2524 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2525 {
2526 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2527 pPT->a[i].u = 0;
2528 }
2529#endif
2530 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2531 break;
2532 }
2533
2534 default:
2535 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2536 }
2537}
2538
2539
2540/**
2541 * Scans one shadow page table for mappings of a physical page.
2542 *
2543 * @param pVM The VM handle.
2544 * @param pPhysPage The guest page in question.
2545 * @param iShw The shadow page table.
2546 * @param cRefs The number of references made in that PT.
2547 */
2548void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2549{
2550 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2551 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2552 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2553 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2554 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2555 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2556}
2557
2558
2559/**
2560 * Flushes a list of shadow page tables mapping the same physical page.
2561 *
2562 * @param pVM The VM handle.
2563 * @param pPhysPage The guest page in question.
2564 * @param iPhysExt The physical cross reference extent list to flush.
2565 */
2566void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2567{
2568 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2569 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2570 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2571
2572 const uint16_t iPhysExtStart = iPhysExt;
2573 PPGMPOOLPHYSEXT pPhysExt;
2574 do
2575 {
2576 Assert(iPhysExt < pPool->cMaxPhysExts);
2577 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2578 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2579 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2580 {
2581 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2582 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2583 }
2584
2585 /* next */
2586 iPhysExt = pPhysExt->iNext;
2587 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2588
2589 /* insert the list into the free list and clear the ram range entry. */
2590 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2591 pPool->iPhysExtFreeHead = iPhysExtStart;
2592 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2593
2594 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2595}
2596
2597#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2598
2599/**
2600 * Scans all shadow page tables for mappings of a physical page.
2601 *
2602 * This may be slow, but it's most likely more efficient than cleaning
2603 * out the entire page pool / cache.
2604 *
2605 * @returns VBox status code.
2606 * @retval VINF_SUCCESS if all references has been successfully cleared.
2607 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2608 * a page pool cleaning.
2609 *
2610 * @param pVM The VM handle.
2611 * @param pPhysPage The guest page in question.
2612 */
2613int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2614{
2615 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2616 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2617 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2618 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2619
2620#if 1
2621 /*
2622 * There is a limit to what makes sense.
2623 */
2624 if (pPool->cPresent > 1024)
2625 {
2626 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2627 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2628 return VINF_PGM_GCPHYS_ALIASED;
2629 }
2630#endif
2631
2632 /*
2633 * Iterate all the pages until we've encountered all that in use.
2634 * This is simple but not quite optimal solution.
2635 */
2636 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2637 const uint32_t u32 = u64;
2638 unsigned cLeft = pPool->cUsedPages;
2639 unsigned iPage = pPool->cCurPages;
2640 while (--iPage >= PGMPOOL_IDX_FIRST)
2641 {
2642 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2643 if (pPage->GCPhys != NIL_RTGCPHYS)
2644 {
2645 switch (pPage->enmKind)
2646 {
2647 /*
2648 * We only care about shadow page tables.
2649 */
2650 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2651 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2652 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2653 {
2654 unsigned cPresent = pPage->cPresent;
2655 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2656 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2657 if (pPT->a[i].n.u1Present)
2658 {
2659 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2660 {
2661 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2662 pPT->a[i].u = 0;
2663 }
2664 if (!--cPresent)
2665 break;
2666 }
2667 break;
2668 }
2669
2670 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2671 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2672 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2673 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2674 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2675 {
2676 unsigned cPresent = pPage->cPresent;
2677 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2678 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2679 if (pPT->a[i].n.u1Present)
2680 {
2681 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2682 {
2683 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2684 pPT->a[i].u = 0;
2685 }
2686 if (!--cPresent)
2687 break;
2688 }
2689 break;
2690 }
2691 }
2692 if (!--cLeft)
2693 break;
2694 }
2695 }
2696
2697 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2698 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2699 return VINF_SUCCESS;
2700}
2701
2702
2703/**
2704 * Clears the user entry in a user table.
2705 *
2706 * This is used to remove all references to a page when flushing it.
2707 */
2708static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2709{
2710 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2711 Assert(pUser->iUser < pPool->cCurPages);
2712
2713 /*
2714 * Map the user page.
2715 */
2716 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2717 union
2718 {
2719 uint64_t *pau64;
2720 uint32_t *pau32;
2721 } u;
2722 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
2723
2724 /* Safety precaution in case we change the paging for other modes too in the future. */
2725 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
2726
2727#ifdef VBOX_STRICT
2728 /*
2729 * Some sanity checks.
2730 */
2731 switch (pUserPage->enmKind)
2732 {
2733 case PGMPOOLKIND_ROOT_32BIT_PD:
2734 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2735 Assert(!(u.pau32[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2736 break;
2737 case PGMPOOLKIND_ROOT_PAE_PD:
2738 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2739 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2740 break;
2741 case PGMPOOLKIND_ROOT_PDPT:
2742 Assert(pUser->iUserTable < 4);
2743 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2744 break;
2745 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2746 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2747 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2748 break;
2749 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2750 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2751 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2752 break;
2753 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2754 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2755 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2756 break;
2757 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2758 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2759 /* GCPhys >> PAGE_SHIFT is the index here */
2760 break;
2761 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2762 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2763 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2764 break;
2765
2766 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2767 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2768 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2769 break;
2770
2771 case PGMPOOLKIND_ROOT_NESTED:
2772 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2773 break;
2774
2775 default:
2776 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2777 break;
2778 }
2779#endif /* VBOX_STRICT */
2780
2781 /*
2782 * Clear the entry in the user page.
2783 */
2784 switch (pUserPage->enmKind)
2785 {
2786 /* 32-bit entries */
2787 case PGMPOOLKIND_ROOT_32BIT_PD:
2788 u.pau32[pUser->iUserTable] = 0;
2789 break;
2790
2791 /* 64-bit entries */
2792 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2793 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2794 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2795 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2796 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2797 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2798 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2799 case PGMPOOLKIND_ROOT_PAE_PD:
2800 case PGMPOOLKIND_ROOT_PDPT:
2801 case PGMPOOLKIND_ROOT_NESTED:
2802 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2803 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2804 u.pau64[pUser->iUserTable] = 0;
2805 break;
2806
2807 default:
2808 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2809 }
2810}
2811
2812
2813/**
2814 * Clears all users of a page.
2815 */
2816static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2817{
2818 /*
2819 * Free all the user records.
2820 */
2821 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2822 uint16_t i = pPage->iUserHead;
2823 while (i != NIL_PGMPOOL_USER_INDEX)
2824 {
2825 /* Clear enter in user table. */
2826 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2827
2828 /* Free it. */
2829 const uint16_t iNext = paUsers[i].iNext;
2830 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2831 paUsers[i].iNext = pPool->iUserFreeHead;
2832 pPool->iUserFreeHead = i;
2833
2834 /* Next. */
2835 i = iNext;
2836 }
2837 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2838}
2839
2840#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2841
2842/**
2843 * Allocates a new physical cross reference extent.
2844 *
2845 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2846 * @param pVM The VM handle.
2847 * @param piPhysExt Where to store the phys ext index.
2848 */
2849PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2850{
2851 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2852 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2853 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2854 {
2855 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2856 return NULL;
2857 }
2858 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2859 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2860 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2861 *piPhysExt = iPhysExt;
2862 return pPhysExt;
2863}
2864
2865
2866/**
2867 * Frees a physical cross reference extent.
2868 *
2869 * @param pVM The VM handle.
2870 * @param iPhysExt The extent to free.
2871 */
2872void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2873{
2874 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2875 Assert(iPhysExt < pPool->cMaxPhysExts);
2876 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2877 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2878 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2879 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2880 pPool->iPhysExtFreeHead = iPhysExt;
2881}
2882
2883
2884/**
2885 * Frees a physical cross reference extent.
2886 *
2887 * @param pVM The VM handle.
2888 * @param iPhysExt The extent to free.
2889 */
2890void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2891{
2892 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2893
2894 const uint16_t iPhysExtStart = iPhysExt;
2895 PPGMPOOLPHYSEXT pPhysExt;
2896 do
2897 {
2898 Assert(iPhysExt < pPool->cMaxPhysExts);
2899 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2900 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2901 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2902
2903 /* next */
2904 iPhysExt = pPhysExt->iNext;
2905 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2906
2907 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2908 pPool->iPhysExtFreeHead = iPhysExtStart;
2909}
2910
2911
2912/**
2913 * Insert a reference into a list of physical cross reference extents.
2914 *
2915 * @returns The new ram range flags (top 16-bits).
2916 *
2917 * @param pVM The VM handle.
2918 * @param iPhysExt The physical extent index of the list head.
2919 * @param iShwPT The shadow page table index.
2920 *
2921 */
2922static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2923{
2924 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2925 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2926
2927 /* special common case. */
2928 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2929 {
2930 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2931 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2932 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2933 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2934 }
2935
2936 /* general treatment. */
2937 const uint16_t iPhysExtStart = iPhysExt;
2938 unsigned cMax = 15;
2939 for (;;)
2940 {
2941 Assert(iPhysExt < pPool->cMaxPhysExts);
2942 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2943 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2944 {
2945 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2946 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2947 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2948 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2949 }
2950 if (!--cMax)
2951 {
2952 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2953 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2954 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2955 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2956 }
2957 }
2958
2959 /* add another extent to the list. */
2960 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2961 if (!pNew)
2962 {
2963 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2964 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2965 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2966 }
2967 pNew->iNext = iPhysExtStart;
2968 pNew->aidx[0] = iShwPT;
2969 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2970 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2971}
2972
2973
2974/**
2975 * Add a reference to guest physical page where extents are in use.
2976 *
2977 * @returns The new ram range flags (top 16-bits).
2978 *
2979 * @param pVM The VM handle.
2980 * @param u16 The ram range flags (top 16-bits).
2981 * @param iShwPT The shadow page table index.
2982 */
2983uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2984{
2985 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2986 {
2987 /*
2988 * Convert to extent list.
2989 */
2990 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2991 uint16_t iPhysExt;
2992 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2993 if (pPhysExt)
2994 {
2995 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2996 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2997 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2998 pPhysExt->aidx[1] = iShwPT;
2999 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3000 }
3001 else
3002 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3003 }
3004 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
3005 {
3006 /*
3007 * Insert into the extent list.
3008 */
3009 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
3010 }
3011 else
3012 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3013 return u16;
3014}
3015
3016
3017/**
3018 * Clear references to guest physical memory.
3019 *
3020 * @param pPool The pool.
3021 * @param pPage The page.
3022 * @param pPhysPage Pointer to the aPages entry in the ram range.
3023 */
3024void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3025{
3026 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
3027 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3028
3029 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
3030 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
3031 {
3032 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3033 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3034 do
3035 {
3036 Assert(iPhysExt < pPool->cMaxPhysExts);
3037
3038 /*
3039 * Look for the shadow page and check if it's all freed.
3040 */
3041 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3042 {
3043 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3044 {
3045 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3046
3047 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3048 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3049 {
3050 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3051 return;
3052 }
3053
3054 /* we can free the node. */
3055 PVM pVM = pPool->CTX_SUFF(pVM);
3056 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3057 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3058 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3059 {
3060 /* lonely node */
3061 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3062 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
3063 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3064 }
3065 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3066 {
3067 /* head */
3068 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
3069 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
3070 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
3071 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
3072 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3073 }
3074 else
3075 {
3076 /* in list */
3077 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3078 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3079 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3080 }
3081 iPhysExt = iPhysExtNext;
3082 return;
3083 }
3084 }
3085
3086 /* next */
3087 iPhysExtPrev = iPhysExt;
3088 iPhysExt = paPhysExts[iPhysExt].iNext;
3089 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3090
3091 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3092 }
3093 else /* nothing to do */
3094 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
3095}
3096
3097
3098/**
3099 * Clear references to guest physical memory.
3100 *
3101 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3102 * is assumed to be correct, so the linear search can be skipped and we can assert
3103 * at an earlier point.
3104 *
3105 * @param pPool The pool.
3106 * @param pPage The page.
3107 * @param HCPhys The host physical address corresponding to the guest page.
3108 * @param GCPhys The guest physical address corresponding to HCPhys.
3109 */
3110static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3111{
3112 /*
3113 * Walk range list.
3114 */
3115 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3116 while (pRam)
3117 {
3118 RTGCPHYS off = GCPhys - pRam->GCPhys;
3119 if (off < pRam->cb)
3120 {
3121 /* does it match? */
3122 const unsigned iPage = off >> PAGE_SHIFT;
3123 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3124#ifdef LOG_ENABLED
3125RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3126Log(("pgmPoolTracDerefGCPhys %VHp vs %VHp\n", HCPhysPage, HCPhys));
3127#endif
3128 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3129 {
3130 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3131 return;
3132 }
3133 break;
3134 }
3135 pRam = pRam->CTX_SUFF(pNext);
3136 }
3137 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
3138}
3139
3140
3141/**
3142 * Clear references to guest physical memory.
3143 *
3144 * @param pPool The pool.
3145 * @param pPage The page.
3146 * @param HCPhys The host physical address corresponding to the guest page.
3147 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3148 */
3149static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3150{
3151 /*
3152 * Walk range list.
3153 */
3154 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3155 while (pRam)
3156 {
3157 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3158 if (off < pRam->cb)
3159 {
3160 /* does it match? */
3161 const unsigned iPage = off >> PAGE_SHIFT;
3162 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3163 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3164 {
3165 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3166 return;
3167 }
3168 break;
3169 }
3170 pRam = pRam->CTX_SUFF(pNext);
3171 }
3172
3173 /*
3174 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3175 */
3176 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3177 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3178 while (pRam)
3179 {
3180 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3181 while (iPage-- > 0)
3182 {
3183 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3184 {
3185 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
3186 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3187 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3188 return;
3189 }
3190 }
3191 pRam = pRam->CTX_SUFF(pNext);
3192 }
3193
3194 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
3195}
3196
3197
3198/**
3199 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3200 *
3201 * @param pPool The pool.
3202 * @param pPage The page.
3203 * @param pShwPT The shadow page table (mapping of the page).
3204 * @param pGstPT The guest page table.
3205 */
3206DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3207{
3208 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3209 if (pShwPT->a[i].n.u1Present)
3210 {
3211 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3212 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3213 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3214 if (!--pPage->cPresent)
3215 break;
3216 }
3217}
3218
3219
3220/**
3221 * Clear references to guest physical memory in a PAE / 32-bit page table.
3222 *
3223 * @param pPool The pool.
3224 * @param pPage The page.
3225 * @param pShwPT The shadow page table (mapping of the page).
3226 * @param pGstPT The guest page table (just a half one).
3227 */
3228DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3229{
3230 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3231 if (pShwPT->a[i].n.u1Present)
3232 {
3233 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3234 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3235 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3236 }
3237}
3238
3239
3240/**
3241 * Clear references to guest physical memory in a PAE / PAE page table.
3242 *
3243 * @param pPool The pool.
3244 * @param pPage The page.
3245 * @param pShwPT The shadow page table (mapping of the page).
3246 * @param pGstPT The guest page table.
3247 */
3248DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3249{
3250 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3251 if (pShwPT->a[i].n.u1Present)
3252 {
3253 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3254 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3255 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3256 }
3257}
3258
3259
3260/**
3261 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3262 *
3263 * @param pPool The pool.
3264 * @param pPage The page.
3265 * @param pShwPT The shadow page table (mapping of the page).
3266 */
3267DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3268{
3269 RTGCPHYS GCPhys = pPage->GCPhys;
3270 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3271 if (pShwPT->a[i].n.u1Present)
3272 {
3273 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3274 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3275 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3276 }
3277}
3278
3279
3280/**
3281 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3282 *
3283 * @param pPool The pool.
3284 * @param pPage The page.
3285 * @param pShwPT The shadow page table (mapping of the page).
3286 */
3287DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3288{
3289 RTGCPHYS GCPhys = pPage->GCPhys;
3290 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3291 if (pShwPT->a[i].n.u1Present)
3292 {
3293 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%VGp\n",
3294 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3295 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3296 }
3297}
3298
3299#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3300
3301/**
3302 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3303 *
3304 * @param pPool The pool.
3305 * @param pPage The page.
3306 * @param pShwPD The shadow page directory (mapping of the page).
3307 */
3308DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3309{
3310 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3311 {
3312 if (pShwPD->a[i].n.u1Present)
3313 {
3314 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3315 if (pSubPage)
3316 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3317 else
3318 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3319 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3320 }
3321 }
3322}
3323
3324
3325/**
3326 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3327 *
3328 * @param pPool The pool.
3329 * @param pPage The page.
3330 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3331 */
3332DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3333{
3334 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3335 {
3336 if (pShwPDPT->a[i].n.u1Present)
3337 {
3338 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3339 if (pSubPage)
3340 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3341 else
3342 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3343 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3344 }
3345 }
3346}
3347
3348
3349/**
3350 * Clear references to shadowed pages in a 64-bit level 4 page table.
3351 *
3352 * @param pPool The pool.
3353 * @param pPage The page.
3354 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3355 */
3356DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3357{
3358 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3359 {
3360 if (pShwPML4->a[i].n.u1Present)
3361 {
3362 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3363 if (pSubPage)
3364 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3365 else
3366 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3367 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3368 }
3369 }
3370}
3371
3372
3373/**
3374 * Clear references to shadowed pages in an EPT page table.
3375 *
3376 * @param pPool The pool.
3377 * @param pPage The page.
3378 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3379 */
3380DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3381{
3382 RTGCPHYS GCPhys = pPage->GCPhys;
3383 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3384 if (pShwPT->a[i].n.u1Present)
3385 {
3386 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3387 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3388 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3389 }
3390}
3391
3392
3393/**
3394 * Clear references to shadowed pages in an EPT page directory.
3395 *
3396 * @param pPool The pool.
3397 * @param pPage The page.
3398 * @param pShwPD The shadow page directory (mapping of the page).
3399 */
3400DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3401{
3402 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3403 {
3404 if (pShwPD->a[i].n.u1Present)
3405 {
3406 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3407 if (pSubPage)
3408 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3409 else
3410 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3411 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3412 }
3413 }
3414}
3415
3416
3417/**
3418 * Clear references to shadowed pages in an EPT page directory pointer table.
3419 *
3420 * @param pPool The pool.
3421 * @param pPage The page.
3422 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3423 */
3424DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3425{
3426 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3427 {
3428 if (pShwPDPT->a[i].n.u1Present)
3429 {
3430 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3431 if (pSubPage)
3432 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3433 else
3434 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3435 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3436 }
3437 }
3438}
3439
3440
3441/**
3442 * Clears all references made by this page.
3443 *
3444 * This includes other shadow pages and GC physical addresses.
3445 *
3446 * @param pPool The pool.
3447 * @param pPage The page.
3448 */
3449static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3450{
3451 /*
3452 * Map the shadow page and take action according to the page kind.
3453 */
3454 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3455 switch (pPage->enmKind)
3456 {
3457#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3458 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3459 {
3460 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3461 void *pvGst;
3462 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3463 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3464 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3465 break;
3466 }
3467
3468 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3469 {
3470 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3471 void *pvGst;
3472 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3473 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3474 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3475 break;
3476 }
3477
3478 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3479 {
3480 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3481 void *pvGst;
3482 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3483 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3484 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3485 break;
3486 }
3487
3488 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3489 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3490 {
3491 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3492 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3493 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3494 break;
3495 }
3496
3497 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3498 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3499 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3500 {
3501 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3502 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3503 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3504 break;
3505 }
3506
3507#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3508 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3509 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3510 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3511 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3512 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3513 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3514 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3515 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3516 break;
3517#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3518
3519 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
3520 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3521 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3522 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3523 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3524 break;
3525
3526 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3527 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3528 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3529 break;
3530
3531 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
3532 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3533 break;
3534
3535 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3536 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
3537 break;
3538
3539 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3540 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
3541 break;
3542
3543 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3544 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
3545 break;
3546
3547 default:
3548 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3549 }
3550
3551 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3552 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3553 ASMMemZeroPage(pvShw);
3554 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3555 pPage->fZeroed = true;
3556}
3557
3558#endif /* PGMPOOL_WITH_USER_TRACKING */
3559
3560/**
3561 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3562 *
3563 * @param pPool The pool.
3564 */
3565static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3566{
3567 /*
3568 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3569 */
3570 Assert(NIL_PGMPOOL_IDX == 0);
3571 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3572 {
3573 /*
3574 * Get the page address.
3575 */
3576 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3577 union
3578 {
3579 uint64_t *pau64;
3580 uint32_t *pau32;
3581 } u;
3582 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3583
3584 /*
3585 * Mark stuff not present.
3586 */
3587 switch (pPage->enmKind)
3588 {
3589 case PGMPOOLKIND_ROOT_32BIT_PD:
3590 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3591 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3592 u.pau32[iPage] = 0;
3593 break;
3594
3595 case PGMPOOLKIND_ROOT_PAE_PD:
3596 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * X86_PG_PAE_PDPE_ENTRIES; iPage++)
3597 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3598 u.pau64[iPage] = 0;
3599 break;
3600
3601 case PGMPOOLKIND_ROOT_PDPT:
3602 /* Not root of shadowed pages currently, ignore it. */
3603 break;
3604
3605 case PGMPOOLKIND_ROOT_NESTED:
3606 ASMMemZero32(u.pau64, PAGE_SIZE);
3607 break;
3608 }
3609 }
3610
3611 /*
3612 * Paranoia (to be removed), flag a global CR3 sync.
3613 */
3614 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
3615}
3616
3617
3618/**
3619 * Flushes the entire cache.
3620 *
3621 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3622 * and execute this CR3 flush.
3623 *
3624 * @param pPool The pool.
3625 */
3626static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3627{
3628 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3629 LogFlow(("pgmPoolFlushAllInt:\n"));
3630
3631 /*
3632 * If there are no pages in the pool, there is nothing to do.
3633 */
3634 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3635 {
3636 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3637 return;
3638 }
3639
3640 /*
3641 * Nuke the free list and reinsert all pages into it.
3642 */
3643 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3644 {
3645 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3646
3647#ifdef IN_RING3
3648 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMR3, pPage->pvPageR3));
3649#endif
3650#ifdef PGMPOOL_WITH_MONITORING
3651 if (pPage->fMonitored)
3652 pgmPoolMonitorFlush(pPool, pPage);
3653 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3654 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3655 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3656 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3657 pPage->cModifications = 0;
3658#endif
3659 pPage->GCPhys = NIL_RTGCPHYS;
3660 pPage->enmKind = PGMPOOLKIND_FREE;
3661 Assert(pPage->idx == i);
3662 pPage->iNext = i + 1;
3663 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3664 pPage->fSeenNonGlobal = false;
3665 pPage->fMonitored= false;
3666 pPage->fCached = false;
3667 pPage->fReusedFlushPending = false;
3668 pPage->fCR3Mix = false;
3669#ifdef PGMPOOL_WITH_USER_TRACKING
3670 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3671#endif
3672#ifdef PGMPOOL_WITH_CACHE
3673 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3674 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3675#endif
3676 }
3677 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3678 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3679 pPool->cUsedPages = 0;
3680
3681#ifdef PGMPOOL_WITH_USER_TRACKING
3682 /*
3683 * Zap and reinitialize the user records.
3684 */
3685 pPool->cPresent = 0;
3686 pPool->iUserFreeHead = 0;
3687 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3688 const unsigned cMaxUsers = pPool->cMaxUsers;
3689 for (unsigned i = 0; i < cMaxUsers; i++)
3690 {
3691 paUsers[i].iNext = i + 1;
3692 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3693 paUsers[i].iUserTable = 0xfffffffe;
3694 }
3695 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3696#endif
3697
3698#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3699 /*
3700 * Clear all the GCPhys links and rebuild the phys ext free list.
3701 */
3702 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3703 pRam;
3704 pRam = pRam->CTX_SUFF(pNext))
3705 {
3706 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3707 while (iPage-- > 0)
3708 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3709 }
3710
3711 pPool->iPhysExtFreeHead = 0;
3712 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3713 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3714 for (unsigned i = 0; i < cMaxPhysExts; i++)
3715 {
3716 paPhysExts[i].iNext = i + 1;
3717 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3718 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3719 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3720 }
3721 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3722#endif
3723
3724#ifdef PGMPOOL_WITH_MONITORING
3725 /*
3726 * Just zap the modified list.
3727 */
3728 pPool->cModifiedPages = 0;
3729 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3730#endif
3731
3732#ifdef PGMPOOL_WITH_CACHE
3733 /*
3734 * Clear the GCPhys hash and the age list.
3735 */
3736 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
3737 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3738 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3739 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3740#endif
3741
3742 /*
3743 * Flush all the special root pages.
3744 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3745 */
3746 pgmPoolFlushAllSpecialRoots(pPool);
3747 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3748 {
3749 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3750 pPage->iNext = NIL_PGMPOOL_IDX;
3751#ifdef PGMPOOL_WITH_MONITORING
3752 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3753 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3754 pPage->cModifications = 0;
3755 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3756 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3757 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3758 if (pPage->fMonitored)
3759 {
3760 PVM pVM = pPool->CTX_SUFF(pVM);
3761 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3762 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3763 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3764 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
3765 pPool->pszAccessHandler);
3766 AssertFatalRCSuccess(rc);
3767# ifdef PGMPOOL_WITH_CACHE
3768 pgmPoolHashInsert(pPool, pPage);
3769# endif
3770 }
3771#endif
3772#ifdef PGMPOOL_WITH_USER_TRACKING
3773 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3774#endif
3775#ifdef PGMPOOL_WITH_CACHE
3776 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3777 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3778#endif
3779 }
3780
3781 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3782}
3783
3784
3785/**
3786 * Flushes a pool page.
3787 *
3788 * This moves the page to the free list after removing all user references to it.
3789 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3790 *
3791 * @returns VBox status code.
3792 * @retval VINF_SUCCESS on success.
3793 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3794 * @param pPool The pool.
3795 * @param HCPhys The HC physical address of the shadow page.
3796 */
3797int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3798{
3799 int rc = VINF_SUCCESS;
3800 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3801 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3802 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3803
3804 /*
3805 * Quietly reject any attempts at flushing any of the special root pages.
3806 */
3807 if (pPage->idx < PGMPOOL_IDX_FIRST)
3808 {
3809 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3810 return VINF_SUCCESS;
3811 }
3812
3813 /*
3814 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
3815 */
3816 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
3817 {
3818 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4, ("Can't free the shadow CR3! (%VGp vs %VGp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
3819 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3820 return VINF_SUCCESS;
3821 }
3822
3823 /*
3824 * Mark the page as being in need of a ASMMemZeroPage().
3825 */
3826 pPage->fZeroed = false;
3827
3828#ifdef PGMPOOL_WITH_USER_TRACKING
3829 /*
3830 * Clear the page.
3831 */
3832 pgmPoolTrackClearPageUsers(pPool, pPage);
3833 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3834 pgmPoolTrackDeref(pPool, pPage);
3835 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3836#endif
3837
3838#ifdef PGMPOOL_WITH_CACHE
3839 /*
3840 * Flush it from the cache.
3841 */
3842 pgmPoolCacheFlushPage(pPool, pPage);
3843#endif /* PGMPOOL_WITH_CACHE */
3844
3845#ifdef PGMPOOL_WITH_MONITORING
3846 /*
3847 * Deregistering the monitoring.
3848 */
3849 if (pPage->fMonitored)
3850 rc = pgmPoolMonitorFlush(pPool, pPage);
3851#endif
3852
3853 /*
3854 * Free the page.
3855 */
3856 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3857 pPage->iNext = pPool->iFreeHead;
3858 pPool->iFreeHead = pPage->idx;
3859 pPage->enmKind = PGMPOOLKIND_FREE;
3860 pPage->GCPhys = NIL_RTGCPHYS;
3861 pPage->fReusedFlushPending = false;
3862
3863 pPool->cUsedPages--;
3864 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3865 return rc;
3866}
3867
3868
3869/**
3870 * Frees a usage of a pool page.
3871 *
3872 * The caller is responsible to updating the user table so that it no longer
3873 * references the shadow page.
3874 *
3875 * @param pPool The pool.
3876 * @param HCPhys The HC physical address of the shadow page.
3877 * @param iUser The shadow page pool index of the user table.
3878 * @param iUserTable The index into the user table (shadowed).
3879 */
3880void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3881{
3882 STAM_PROFILE_START(&pPool->StatFree, a);
3883 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3884 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3885 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3886#ifdef PGMPOOL_WITH_USER_TRACKING
3887 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3888#endif
3889#ifdef PGMPOOL_WITH_CACHE
3890 if (!pPage->fCached)
3891#endif
3892 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3893 STAM_PROFILE_STOP(&pPool->StatFree, a);
3894}
3895
3896
3897/**
3898 * Makes one or more free page free.
3899 *
3900 * @returns VBox status code.
3901 * @retval VINF_SUCCESS on success.
3902 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3903 *
3904 * @param pPool The pool.
3905 * @param iUser The user of the page.
3906 */
3907static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3908{
3909 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3910
3911 /*
3912 * If the pool isn't full grown yet, expand it.
3913 */
3914 if (pPool->cCurPages < pPool->cMaxPages)
3915 {
3916 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3917#ifdef IN_RING3
3918 int rc = PGMR3PoolGrow(pPool->pVMR3);
3919#else
3920 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3921#endif
3922 if (VBOX_FAILURE(rc))
3923 return rc;
3924 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3925 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3926 return VINF_SUCCESS;
3927 }
3928
3929#ifdef PGMPOOL_WITH_CACHE
3930 /*
3931 * Free one cached page.
3932 */
3933 return pgmPoolCacheFreeOne(pPool, iUser);
3934#else
3935 /*
3936 * Flush the pool.
3937 * If we have tracking enabled, it should be possible to come up with
3938 * a cheap replacement strategy...
3939 */
3940 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
3941 Assert(!CPUMIsGuestInLongMode(pVM));
3942 pgmPoolFlushAllInt(pPool);
3943 return VERR_PGM_POOL_FLUSHED;
3944#endif
3945}
3946
3947
3948/**
3949 * Allocates a page from the pool.
3950 *
3951 * This page may actually be a cached page and not in need of any processing
3952 * on the callers part.
3953 *
3954 * @returns VBox status code.
3955 * @retval VINF_SUCCESS if a NEW page was allocated.
3956 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3957 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3958 * @param pVM The VM handle.
3959 * @param GCPhys The GC physical address of the page we're gonna shadow.
3960 * For 4MB and 2MB PD entries, it's the first address the
3961 * shadow PT is covering.
3962 * @param enmKind The kind of mapping.
3963 * @param iUser The shadow page pool index of the user table.
3964 * @param iUserTable The index into the user table (shadowed).
3965 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3966 */
3967int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
3968{
3969 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3970 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3971 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3972 *ppPage = NULL;
3973
3974#ifdef PGMPOOL_WITH_CACHE
3975 if (pPool->fCacheEnabled)
3976 {
3977 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3978 if (VBOX_SUCCESS(rc2))
3979 {
3980 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3981 LogFlow(("pgmPoolAlloc: cached returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3982 return rc2;
3983 }
3984 }
3985#endif
3986
3987 /*
3988 * Allocate a new one.
3989 */
3990 int rc = VINF_SUCCESS;
3991 uint16_t iNew = pPool->iFreeHead;
3992 if (iNew == NIL_PGMPOOL_IDX)
3993 {
3994 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3995 if (VBOX_FAILURE(rc))
3996 {
3997 if (rc != VERR_PGM_POOL_CLEARED)
3998 {
3999 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
4000 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4001 return rc;
4002 }
4003 Log(("pgmPoolMakeMoreFreePages failed with %Vrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
4004 rc = VERR_PGM_POOL_FLUSHED;
4005 }
4006 iNew = pPool->iFreeHead;
4007 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4008 }
4009
4010 /* unlink the free head */
4011 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4012 pPool->iFreeHead = pPage->iNext;
4013 pPage->iNext = NIL_PGMPOOL_IDX;
4014
4015 /*
4016 * Initialize it.
4017 */
4018 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4019 pPage->enmKind = enmKind;
4020 pPage->GCPhys = GCPhys;
4021 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4022 pPage->fMonitored = false;
4023 pPage->fCached = false;
4024 pPage->fReusedFlushPending = false;
4025 pPage->fCR3Mix = false;
4026#ifdef PGMPOOL_WITH_MONITORING
4027 pPage->cModifications = 0;
4028 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4029 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4030#endif
4031#ifdef PGMPOOL_WITH_USER_TRACKING
4032 pPage->cPresent = 0;
4033 pPage->iFirstPresent = ~0;
4034
4035 /*
4036 * Insert into the tracking and cache. If this fails, free the page.
4037 */
4038 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4039 if (VBOX_FAILURE(rc3))
4040 {
4041 if (rc3 != VERR_PGM_POOL_CLEARED)
4042 {
4043 pPool->cUsedPages--;
4044 pPage->enmKind = PGMPOOLKIND_FREE;
4045 pPage->GCPhys = NIL_RTGCPHYS;
4046 pPage->iNext = pPool->iFreeHead;
4047 pPool->iFreeHead = pPage->idx;
4048 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4049 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
4050 return rc3;
4051 }
4052 Log(("pgmPoolTrackInsert failed with %Vrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4053 rc = VERR_PGM_POOL_FLUSHED;
4054 }
4055#endif /* PGMPOOL_WITH_USER_TRACKING */
4056
4057 /*
4058 * Commit the allocation, clear the page and return.
4059 */
4060#ifdef VBOX_WITH_STATISTICS
4061 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4062 pPool->cUsedPagesHigh = pPool->cUsedPages;
4063#endif
4064
4065 if (!pPage->fZeroed)
4066 {
4067 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4068 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4069 ASMMemZeroPage(pv);
4070 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4071 }
4072
4073 *ppPage = pPage;
4074 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4075 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4076 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4077 return rc;
4078}
4079
4080
4081/**
4082 * Frees a usage of a pool page.
4083 *
4084 * @param pVM The VM handle.
4085 * @param HCPhys The HC physical address of the shadow page.
4086 * @param iUser The shadow page pool index of the user table.
4087 * @param iUserTable The index into the user table (shadowed).
4088 */
4089void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4090{
4091 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4092 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4093 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4094}
4095
4096
4097/**
4098 * Gets a in-use page in the pool by it's physical address.
4099 *
4100 * @returns Pointer to the page.
4101 * @param pVM The VM handle.
4102 * @param HCPhys The HC physical address of the shadow page.
4103 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4104 */
4105PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4106{
4107 /** @todo profile this! */
4108 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4109 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4110 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
4111 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
4112 return pPage;
4113}
4114
4115
4116/**
4117 * Flushes the entire cache.
4118 *
4119 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4120 * and execute this CR3 flush.
4121 *
4122 * @param pPool The pool.
4123 */
4124void pgmPoolFlushAll(PVM pVM)
4125{
4126 LogFlow(("pgmPoolFlushAll:\n"));
4127 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4128}
4129
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette