VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 13013

Last change on this file since 13013 was 12936, checked in by vboxsync, 16 years ago

More EPT updates

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 144.6 KB
Line 
1/* $Id: PGMAllPool.cpp 12936 2008-10-02 13:19:46Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_GC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42
43
44/*******************************************************************************
45* Internal Functions *
46*******************************************************************************/
47__BEGIN_DECLS
48static void pgmPoolFlushAllInt(PPGMPOOL pPool);
49#ifdef PGMPOOL_WITH_USER_TRACKING
50DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
51DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#endif
54#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
55static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
56#endif
57#ifdef PGMPOOL_WITH_CACHE
58static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
59#endif
60#ifdef PGMPOOL_WITH_MONITORING
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#endif
63#ifndef IN_RING3
64DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
65#endif
66__END_DECLS
67
68
69/**
70 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
71 *
72 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
73 * @param enmKind The page kind.
74 */
75DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
76{
77 switch (enmKind)
78 {
79 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
82 return true;
83 default:
84 return false;
85 }
86}
87
88
89#ifdef IN_GC
90/**
91 * Maps a pool page into the current context.
92 *
93 * @returns Pointer to the mapping.
94 * @param pVM The VM handle.
95 * @param pPage The page to map.
96 */
97void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
98{
99 /* general pages. */
100 if (pPage->idx >= PGMPOOL_IDX_FIRST)
101 {
102 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
103 void *pv;
104 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
105 AssertReleaseRC(rc);
106 return pv;
107 }
108
109 /* special pages. */
110 switch (pPage->idx)
111 {
112 case PGMPOOL_IDX_PD:
113 return pVM->pgm.s.pGC32BitPD;
114 case PGMPOOL_IDX_PAE_PD:
115 case PGMPOOL_IDX_PAE_PD_0:
116 return pVM->pgm.s.apGCPaePDs[0];
117 case PGMPOOL_IDX_PAE_PD_1:
118 return pVM->pgm.s.apGCPaePDs[1];
119 case PGMPOOL_IDX_PAE_PD_2:
120 return pVM->pgm.s.apGCPaePDs[2];
121 case PGMPOOL_IDX_PAE_PD_3:
122 return pVM->pgm.s.apGCPaePDs[3];
123 case PGMPOOL_IDX_PDPT:
124 return pVM->pgm.s.pGCPaePDPT;
125 default:
126 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
127 return NULL;
128 }
129}
130#endif /* IN_GC */
131
132
133#ifdef PGMPOOL_WITH_MONITORING
134/**
135 * Determin the size of a write instruction.
136 * @returns number of bytes written.
137 * @param pDis The disassembler state.
138 */
139static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
140{
141 /*
142 * This is very crude and possibly wrong for some opcodes,
143 * but since it's not really supposed to be called we can
144 * probably live with that.
145 */
146 return DISGetParamSize(pDis, &pDis->param1);
147}
148
149
150/**
151 * Flushes a chain of pages sharing the same access monitor.
152 *
153 * @returns VBox status code suitable for scheduling.
154 * @param pPool The pool.
155 * @param pPage A page in the chain.
156 */
157int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
158{
159 LogFlow(("pgmPoolMonitorChainFlush: Flush page %VGp type=%d\n", pPage->GCPhys, pPage->enmKind));
160
161 /*
162 * Find the list head.
163 */
164 uint16_t idx = pPage->idx;
165 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
166 {
167 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
168 {
169 idx = pPage->iMonitoredPrev;
170 Assert(idx != pPage->idx);
171 pPage = &pPool->aPages[idx];
172 }
173 }
174
175 /*
176 * Iterate the list flushing each shadow page.
177 */
178 int rc = VINF_SUCCESS;
179 for (;;)
180 {
181 idx = pPage->iMonitoredNext;
182 Assert(idx != pPage->idx);
183 if (pPage->idx >= PGMPOOL_IDX_FIRST)
184 {
185 int rc2 = pgmPoolFlushPage(pPool, pPage);
186 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
187 rc = VINF_PGM_SYNC_CR3;
188 }
189 /* next */
190 if (idx == NIL_PGMPOOL_IDX)
191 break;
192 pPage = &pPool->aPages[idx];
193 }
194 return rc;
195}
196
197
198/**
199 * Wrapper for getting the current context pointer to the entry being modified.
200 *
201 * @returns Pointer to the current context mapping of the entry.
202 * @param pPool The pool.
203 * @param pvFault The fault virtual address.
204 * @param GCPhysFault The fault physical address.
205 * @param cbEntry The entry size.
206 */
207#ifdef IN_RING3
208DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
209#else
210DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
211#endif
212{
213#ifdef IN_GC
214 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
215
216#elif defined(IN_RING0)
217 void *pvRet;
218 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
219 AssertFatalRCSuccess(rc);
220 return pvRet;
221
222#elif defined(IN_RING3)
223 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
224#else
225# error "huh?"
226#endif
227}
228
229
230/**
231 * Process shadow entries before they are changed by the guest.
232 *
233 * For PT entries we will clear them. For PD entries, we'll simply check
234 * for mapping conflicts and set the SyncCR3 FF if found.
235 *
236 * @param pPool The pool.
237 * @param pPage The head page.
238 * @param GCPhysFault The guest physical fault address.
239 * @param uAddress In R0 and GC this is the guest context fault address (flat).
240 * In R3 this is the host context 'fault' address.
241 * @param pCpu The disassembler state for figuring out the write size.
242 * This need not be specified if the caller knows we won't do cross entry accesses.
243 */
244#ifdef IN_RING3
245void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
246#else
247void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
248#endif
249{
250 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
251 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
252 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
253
254 LogFlow(("pgmPoolMonitorChainChanging: %VGv phys=%VGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
255
256 for (;;)
257 {
258 union
259 {
260 void *pv;
261 PX86PT pPT;
262 PX86PTPAE pPTPae;
263 PX86PD pPD;
264 PX86PDPAE pPDPae;
265 PX86PDPT pPDPT;
266 PX86PML4 pPML4;
267 } uShw;
268 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
269
270 switch (pPage->enmKind)
271 {
272 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
273 {
274 const unsigned iShw = off / sizeof(X86PTE);
275 if (uShw.pPT->a[iShw].n.u1Present)
276 {
277# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
278 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
279 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
280 pgmPoolTracDerefGCPhysHint(pPool, pPage,
281 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
282 pGstPte->u & X86_PTE_PG_MASK);
283# endif
284 uShw.pPT->a[iShw].u = 0;
285 }
286 break;
287 }
288
289 /* page/2 sized */
290 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
291 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
292 {
293 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
294 if (uShw.pPTPae->a[iShw].n.u1Present)
295 {
296# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
297 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
298 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
299 pgmPoolTracDerefGCPhysHint(pPool, pPage,
300 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
301 pGstPte->u & X86_PTE_PG_MASK);
302# endif
303 uShw.pPTPae->a[iShw].u = 0;
304 }
305 }
306 break;
307
308 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
309 {
310 const unsigned iShw = off / sizeof(X86PTEPAE);
311 if (uShw.pPTPae->a[iShw].n.u1Present)
312 {
313# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
314 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
315 Log4(("pgmPoolMonitorChainChanging pae: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
316 pgmPoolTracDerefGCPhysHint(pPool, pPage,
317 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
318 pGstPte->u & X86_PTE_PAE_PG_MASK);
319# endif
320 uShw.pPTPae->a[iShw].u = 0;
321 }
322
323 /* paranoia / a bit assumptive. */
324 if ( pCpu
325 && (off & 7)
326 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
327 {
328 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
329 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
330
331 if (uShw.pPTPae->a[iShw2].n.u1Present)
332 {
333# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
334 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
335 Log4(("pgmPoolMonitorChainChanging pae: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
336 pgmPoolTracDerefGCPhysHint(pPool, pPage,
337 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
338 pGstPte->u & X86_PTE_PAE_PG_MASK);
339# endif
340 uShw.pPTPae->a[iShw2].u = 0;
341 }
342 }
343
344 break;
345 }
346
347 case PGMPOOLKIND_ROOT_32BIT_PD:
348 {
349 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
350 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
351 {
352 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
353 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
354 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
355 }
356 /* paranoia / a bit assumptive. */
357 else if ( pCpu
358 && (off & 3)
359 && (off & 3) + cbWrite > sizeof(X86PTE))
360 {
361 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
362 if ( iShw2 != iShw
363 && iShw2 < RT_ELEMENTS(uShw.pPD->a)
364 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
365 {
366 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
367 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
368 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
369 }
370 }
371#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
372 if ( uShw.pPD->a[iShw].n.u1Present
373 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
374 {
375 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
376# ifdef IN_GC /* TLB load - we're pushing things a bit... */
377 ASMProbeReadByte(pvAddress);
378# endif
379 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
380 uShw.pPD->a[iShw].u = 0;
381 }
382#endif
383 break;
384 }
385
386 case PGMPOOLKIND_ROOT_PAE_PD:
387 {
388 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
389 for (unsigned i = 0; i < 2; i++, iShw++)
390 {
391 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
392 {
393 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
394 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
395 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
396 }
397 /* paranoia / a bit assumptive. */
398 else if ( pCpu
399 && (off & 3)
400 && (off & 3) + cbWrite > 4)
401 {
402 const unsigned iShw2 = iShw + 2;
403 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a)
404 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
405 {
406 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
407 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
408 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
409 }
410 }
411#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
412 if ( uShw.pPDPae->a[iShw].n.u1Present
413 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
414 {
415 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
416# ifdef IN_GC /* TLB load - we're pushing things a bit... */
417 ASMProbeReadByte(pvAddress);
418# endif
419 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
420 uShw.pPDPae->a[iShw].u = 0;
421 }
422#endif
423 }
424 break;
425 }
426
427 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
428 {
429 const unsigned iShw = off / sizeof(X86PDEPAE);
430 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
431 {
432 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
433 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
434 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
435 }
436#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
437 /* causes trouble when the guest uses a PDE to refer to the whole page table level structure. (invalidate here; faults later on when it tries
438 * to change the page table entries
439 * -> recheck; probably only applies to the GC case
440 */
441 else
442 {
443 if (uShw.pPDPae->a[iShw].n.u1Present)
444 {
445 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
446 pgmPoolFree(pPool->CTXSUFF(pVM),
447 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
448 /* Note: hardcoded PAE implementation dependency */
449 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
450 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
451 uShw.pPDPae->a[iShw].u = 0;
452 }
453 }
454#endif
455 /* paranoia / a bit assumptive. */
456 if ( pCpu
457 && (off & 7)
458 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
459 {
460 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
461 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
462
463 if ( iShw2 != iShw
464 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
465 {
466 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
467 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
468 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
469 }
470#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
471 else
472 if (uShw.pPDPae->a[iShw2].n.u1Present)
473 {
474 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
475 pgmPoolFree(pPool->CTXSUFF(pVM),
476 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
477 /* Note: hardcoded PAE implementation dependency */
478 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
479 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
480 uShw.pPDPae->a[iShw2].u = 0;
481 }
482#endif
483 }
484 break;
485 }
486
487 case PGMPOOLKIND_ROOT_PDPT:
488 {
489 /* Hopefully this doesn't happen very often:
490 * - touching unused parts of the page
491 * - messing with the bits of pd pointers without changing the physical address
492 */
493 const unsigned iShw = off / sizeof(X86PDPE);
494 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
495 {
496 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
497 {
498 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
499 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
500 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
501 }
502 /* paranoia / a bit assumptive. */
503 else if ( pCpu
504 && (off & 7)
505 && (off & 7) + cbWrite > sizeof(X86PDPE))
506 {
507 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
508 if ( iShw2 != iShw
509 && iShw2 < X86_PG_PAE_PDPE_ENTRIES
510 && uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
511 {
512 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
513 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
514 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
515 }
516 }
517 }
518 break;
519 }
520
521#ifndef IN_GC
522 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
523 {
524 Assert(pPage->enmKind == PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD);
525
526 const unsigned iShw = off / sizeof(X86PDEPAE);
527 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
528 {
529 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
530 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
531 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
532 }
533 else
534 {
535 if (uShw.pPDPae->a[iShw].n.u1Present)
536 {
537 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
538 pgmPoolFree(pPool->CTXSUFF(pVM),
539 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
540 pPage->idx,
541 iShw);
542 uShw.pPDPae->a[iShw].u = 0;
543 }
544 }
545 /* paranoia / a bit assumptive. */
546 if ( pCpu
547 && (off & 7)
548 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
549 {
550 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
551 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
552
553 if ( iShw2 != iShw
554 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
555 {
556 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
557 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
558 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
559 }
560 else
561 if (uShw.pPDPae->a[iShw2].n.u1Present)
562 {
563 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
564 pgmPoolFree(pPool->CTXSUFF(pVM),
565 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
566 pPage->idx,
567 iShw2);
568 uShw.pPDPae->a[iShw2].u = 0;
569 }
570 }
571 break;
572 }
573
574 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
575 {
576 /* Hopefully this doesn't happen very often:
577 * - messing with the bits of pd pointers without changing the physical address
578 */
579 if (!VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
580 {
581 const unsigned iShw = off / sizeof(X86PDPE);
582 if (uShw.pPDPT->a[iShw].n.u1Present)
583 {
584 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
585 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
586 uShw.pPDPT->a[iShw].u = 0;
587 }
588 /* paranoia / a bit assumptive. */
589 if ( pCpu
590 && (off & 7)
591 && (off & 7) + cbWrite > sizeof(X86PDPE))
592 {
593 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
594 if (uShw.pPDPT->a[iShw2].n.u1Present)
595 {
596 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
597 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
598 uShw.pPDPT->a[iShw2].u = 0;
599 }
600 }
601 }
602 break;
603 }
604
605 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
606 {
607 /* Hopefully this doesn't happen very often:
608 * - messing with the bits of pd pointers without changing the physical address
609 */
610 if (!VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
611 {
612 const unsigned iShw = off / sizeof(X86PDPE);
613 if (uShw.pPML4->a[iShw].n.u1Present)
614 {
615 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
616 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
617 uShw.pPML4->a[iShw].u = 0;
618 }
619 /* paranoia / a bit assumptive. */
620 if ( pCpu
621 && (off & 7)
622 && (off & 7) + cbWrite > sizeof(X86PDPE))
623 {
624 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
625 if (uShw.pPML4->a[iShw2].n.u1Present)
626 {
627 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
628 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
629 uShw.pPML4->a[iShw2].u = 0;
630 }
631 }
632 }
633 break;
634 }
635#endif /* IN_RING0 */
636
637 default:
638 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
639 }
640
641 /* next */
642 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
643 return;
644 pPage = &pPool->aPages[pPage->iMonitoredNext];
645 }
646}
647
648
649# ifndef IN_RING3
650/**
651 * Checks if a access could be a fork operation in progress.
652 *
653 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
654 *
655 * @returns true if it's likly that we're forking, otherwise false.
656 * @param pPool The pool.
657 * @param pCpu The disassembled instruction.
658 * @param offFault The access offset.
659 */
660DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
661{
662 /*
663 * i386 linux is using btr to clear X86_PTE_RW.
664 * The functions involved are (2.6.16 source inspection):
665 * clear_bit
666 * ptep_set_wrprotect
667 * copy_one_pte
668 * copy_pte_range
669 * copy_pmd_range
670 * copy_pud_range
671 * copy_page_range
672 * dup_mmap
673 * dup_mm
674 * copy_mm
675 * copy_process
676 * do_fork
677 */
678 if ( pCpu->pCurInstr->opcode == OP_BTR
679 && !(offFault & 4)
680 /** @todo Validate that the bit index is X86_PTE_RW. */
681 )
682 {
683 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
684 return true;
685 }
686 return false;
687}
688
689
690/**
691 * Determine whether the page is likely to have been reused.
692 *
693 * @returns true if we consider the page as being reused for a different purpose.
694 * @returns false if we consider it to still be a paging page.
695 * @param pVM VM Handle.
696 * @param pPage The page in question.
697 * @param pRegFrame Trap register frame.
698 * @param pCpu The disassembly info for the faulting instruction.
699 * @param pvFault The fault address.
700 *
701 * @remark The REP prefix check is left to the caller because of STOSD/W.
702 */
703DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
704{
705#ifndef IN_GC
706 if ( HWACCMHasPendingIrq(pVM)
707 && (pRegFrame->rsp - pvFault) < 32)
708 {
709 /* Fault caused by stack writes while trying to inject an interrupt event. */
710 Log(("pgmPoolMonitorIsReused: reused %VGv for interrupt stack (rsp=%VGv).\n", pvFault, pRegFrame->rsp));
711 return true;
712 }
713#else
714 NOREF(pVM);
715#endif
716
717 switch (pCpu->pCurInstr->opcode)
718 {
719 /* call implies the actual push of the return address faulted */
720 case OP_CALL:
721 Log4(("pgmPoolMonitorIsReused: CALL\n"));
722 return true;
723 case OP_PUSH:
724 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
725 return true;
726 case OP_PUSHF:
727 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
728 return true;
729 case OP_PUSHA:
730 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
731 return true;
732 case OP_FXSAVE:
733 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
734 return true;
735 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
736 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
737 return true;
738 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
739 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
740 return true;
741 case OP_MOVSWD:
742 case OP_STOSWD:
743 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
744 && pRegFrame->rcx >= 0x40
745 )
746 {
747 Assert(pCpu->mode == CPUMODE_64BIT);
748
749 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
750 return true;
751 }
752 return false;
753 }
754 if ( (pCpu->param1.flags & USE_REG_GEN32)
755 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
756 {
757 Log4(("pgmPoolMonitorIsReused: ESP\n"));
758 return true;
759 }
760
761 //if (pPage->fCR3Mix)
762 // return false;
763 return false;
764}
765
766
767/**
768 * Flushes the page being accessed.
769 *
770 * @returns VBox status code suitable for scheduling.
771 * @param pVM The VM handle.
772 * @param pPool The pool.
773 * @param pPage The pool page (head).
774 * @param pCpu The disassembly of the write instruction.
775 * @param pRegFrame The trap register frame.
776 * @param GCPhysFault The fault address as guest physical address.
777 * @param pvFault The fault address.
778 */
779static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
780 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
781{
782 /*
783 * First, do the flushing.
784 */
785 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
786
787 /*
788 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
789 */
790 uint32_t cbWritten;
791 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
792 if (VBOX_SUCCESS(rc2))
793 pRegFrame->rip += pCpu->opsize;
794 else if (rc2 == VERR_EM_INTERPRETER)
795 {
796#ifdef IN_GC
797 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
798 {
799 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
800 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
801 rc = VINF_SUCCESS;
802 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
803 }
804 else
805#endif
806 {
807 rc = VINF_EM_RAW_EMULATE_INSTR;
808 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
809 }
810 }
811 else
812 rc = rc2;
813
814 /* See use in pgmPoolAccessHandlerSimple(). */
815 PGM_INVL_GUEST_TLBS();
816
817 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
818 return rc;
819
820}
821
822
823/**
824 * Handles the STOSD write accesses.
825 *
826 * @returns VBox status code suitable for scheduling.
827 * @param pVM The VM handle.
828 * @param pPool The pool.
829 * @param pPage The pool page (head).
830 * @param pCpu The disassembly of the write instruction.
831 * @param pRegFrame The trap register frame.
832 * @param GCPhysFault The fault address as guest physical address.
833 * @param pvFault The fault address.
834 */
835DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
836 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
837{
838 Assert(pCpu->mode == CPUMODE_32BIT);
839
840 /*
841 * Increment the modification counter and insert it into the list
842 * of modified pages the first time.
843 */
844 if (!pPage->cModifications++)
845 pgmPoolMonitorModifiedInsert(pPool, pPage);
846
847 /*
848 * Execute REP STOSD.
849 *
850 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
851 * write situation, meaning that it's safe to write here.
852 */
853 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
854 while (pRegFrame->ecx)
855 {
856 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
857#ifdef IN_GC
858 *(uint32_t *)pu32 = pRegFrame->eax;
859#else
860 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
861#endif
862 pu32 += 4;
863 GCPhysFault += 4;
864 pRegFrame->edi += 4;
865 pRegFrame->ecx--;
866 }
867 pRegFrame->rip += pCpu->opsize;
868
869 /* See use in pgmPoolAccessHandlerSimple(). */
870 PGM_INVL_GUEST_TLBS();
871
872 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
873 return VINF_SUCCESS;
874}
875
876
877/**
878 * Handles the simple write accesses.
879 *
880 * @returns VBox status code suitable for scheduling.
881 * @param pVM The VM handle.
882 * @param pPool The pool.
883 * @param pPage The pool page (head).
884 * @param pCpu The disassembly of the write instruction.
885 * @param pRegFrame The trap register frame.
886 * @param GCPhysFault The fault address as guest physical address.
887 * @param pvFault The fault address.
888 */
889DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
890 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
891{
892 /*
893 * Increment the modification counter and insert it into the list
894 * of modified pages the first time.
895 */
896 if (!pPage->cModifications++)
897 pgmPoolMonitorModifiedInsert(pPool, pPage);
898
899 /*
900 * Clear all the pages. ASSUMES that pvFault is readable.
901 */
902 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
903
904 /*
905 * Interpret the instruction.
906 */
907 uint32_t cb;
908 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
909 if (VBOX_SUCCESS(rc))
910 pRegFrame->rip += pCpu->opsize;
911 else if (rc == VERR_EM_INTERPRETER)
912 {
913 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
914 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
915 rc = VINF_EM_RAW_EMULATE_INSTR;
916 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
917 }
918
919 /*
920 * Quick hack, with logging enabled we're getting stale
921 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
922 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
923 * have to be fixed to support this. But that'll have to wait till next week.
924 *
925 * An alternative is to keep track of the changed PTEs together with the
926 * GCPhys from the guest PT. This may proove expensive though.
927 *
928 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
929 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
930 */
931 PGM_INVL_GUEST_TLBS();
932
933 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
934 return rc;
935}
936
937
938/**
939 * \#PF Handler callback for PT write accesses.
940 *
941 * @returns VBox status code (appropriate for GC return).
942 * @param pVM VM Handle.
943 * @param uErrorCode CPU Error code.
944 * @param pRegFrame Trap register frame.
945 * NULL on DMA and other non CPU access.
946 * @param pvFault The fault address (cr2).
947 * @param GCPhysFault The GC physical address corresponding to pvFault.
948 * @param pvUser User argument.
949 */
950DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
951{
952 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
953 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
954 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
955 LogFlow(("pgmPoolAccessHandler: pvFault=%VGv pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
956
957 /*
958 * We should ALWAYS have the list head as user parameter. This
959 * is because we use that page to record the changes.
960 */
961 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
962
963 /*
964 * Disassemble the faulting instruction.
965 */
966 DISCPUSTATE Cpu;
967 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
968 AssertRCReturn(rc, rc);
969
970 /*
971 * Check if it's worth dealing with.
972 */
973 bool fReused = false;
974 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
975 || pPage->fCR3Mix)
976 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
977 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
978 {
979 /*
980 * Simple instructions, no REP prefix.
981 */
982 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
983 {
984 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
985 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
986 return rc;
987 }
988
989 /*
990 * Windows is frequently doing small memset() operations (netio test 4k+).
991 * We have to deal with these or we'll kill the cache and performance.
992 */
993 if ( Cpu.pCurInstr->opcode == OP_STOSWD
994 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
995 && pRegFrame->ecx <= 0x20
996 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
997 && !((uintptr_t)pvFault & 3)
998 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
999 && Cpu.mode == CPUMODE_32BIT
1000 && Cpu.opmode == CPUMODE_32BIT
1001 && Cpu.addrmode == CPUMODE_32BIT
1002 && Cpu.prefix == PREFIX_REP
1003 && !pRegFrame->eflags.Bits.u1DF
1004 )
1005 {
1006 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1007 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
1008 return rc;
1009 }
1010
1011 /* REP prefix, don't bother. */
1012 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
1013 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%VGv opcode=%d prefix=%#x\n",
1014 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1015 }
1016
1017 /*
1018 * Not worth it, so flush it.
1019 *
1020 * If we considered it to be reused, don't to back to ring-3
1021 * to emulate failed instructions since we usually cannot
1022 * interpret then. This may be a bit risky, in which case
1023 * the reuse detection must be fixed.
1024 */
1025 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1026 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1027 rc = VINF_SUCCESS;
1028 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
1029 return rc;
1030}
1031
1032# endif /* !IN_RING3 */
1033#endif /* PGMPOOL_WITH_MONITORING */
1034
1035
1036
1037#ifdef PGMPOOL_WITH_CACHE
1038/**
1039 * Inserts a page into the GCPhys hash table.
1040 *
1041 * @param pPool The pool.
1042 * @param pPage The page.
1043 */
1044DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1045{
1046 Log3(("pgmPoolHashInsert: %VGp\n", pPage->GCPhys));
1047 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1048 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1049 pPage->iNext = pPool->aiHash[iHash];
1050 pPool->aiHash[iHash] = pPage->idx;
1051}
1052
1053
1054/**
1055 * Removes a page from the GCPhys hash table.
1056 *
1057 * @param pPool The pool.
1058 * @param pPage The page.
1059 */
1060DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1061{
1062 Log3(("pgmPoolHashRemove: %VGp\n", pPage->GCPhys));
1063 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1064 if (pPool->aiHash[iHash] == pPage->idx)
1065 pPool->aiHash[iHash] = pPage->iNext;
1066 else
1067 {
1068 uint16_t iPrev = pPool->aiHash[iHash];
1069 for (;;)
1070 {
1071 const int16_t i = pPool->aPages[iPrev].iNext;
1072 if (i == pPage->idx)
1073 {
1074 pPool->aPages[iPrev].iNext = pPage->iNext;
1075 break;
1076 }
1077 if (i == NIL_PGMPOOL_IDX)
1078 {
1079 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1080 break;
1081 }
1082 iPrev = i;
1083 }
1084 }
1085 pPage->iNext = NIL_PGMPOOL_IDX;
1086}
1087
1088
1089/**
1090 * Frees up one cache page.
1091 *
1092 * @returns VBox status code.
1093 * @retval VINF_SUCCESS on success.
1094 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1095 * @param pPool The pool.
1096 * @param iUser The user index.
1097 */
1098static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1099{
1100#ifndef IN_GC
1101 const PVM pVM = pPool->CTXSUFF(pVM);
1102#endif
1103 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1104 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1105
1106 /*
1107 * Select one page from the tail of the age list.
1108 */
1109 uint16_t iToFree = pPool->iAgeTail;
1110 if (iToFree == iUser)
1111 iToFree = pPool->aPages[iToFree].iAgePrev;
1112/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1113 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1114 {
1115 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1116 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1117 {
1118 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1119 continue;
1120 iToFree = i;
1121 break;
1122 }
1123 }
1124*/
1125 Assert(iToFree != iUser);
1126 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1127
1128 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
1129 if (rc == VINF_SUCCESS)
1130 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1131 return rc;
1132}
1133
1134
1135/**
1136 * Checks if a kind mismatch is really a page being reused
1137 * or if it's just normal remappings.
1138 *
1139 * @returns true if reused and the cached page (enmKind1) should be flushed
1140 * @returns false if not reused.
1141 * @param enmKind1 The kind of the cached page.
1142 * @param enmKind2 The kind of the requested page.
1143 */
1144static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1145{
1146 switch (enmKind1)
1147 {
1148 /*
1149 * Never reuse them. There is no remapping in non-paging mode.
1150 */
1151 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1152 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1153 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1154 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1155 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1156 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1157 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1158 return true;
1159
1160 /*
1161 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1162 */
1163 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1164 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1165 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1166 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1167 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1168 switch (enmKind2)
1169 {
1170 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1171 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1172 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1173 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1174 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1175 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1176 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1177 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1178 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1179 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1180 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1181 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1182 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1183 return true;
1184 default:
1185 return false;
1186 }
1187
1188 /*
1189 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1190 */
1191 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1192 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1193 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1194 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1195 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1196 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1197 switch (enmKind2)
1198 {
1199 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1200 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1201 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1202 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1203 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1204 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1205 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1206 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1207 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1208 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1209 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1210 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1211 return true;
1212 default:
1213 return false;
1214 }
1215
1216 /*
1217 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1218 */
1219 case PGMPOOLKIND_ROOT_32BIT_PD:
1220 case PGMPOOLKIND_ROOT_PAE_PD:
1221 case PGMPOOLKIND_ROOT_PDPT:
1222 case PGMPOOLKIND_ROOT_NESTED:
1223 return false;
1224
1225 default:
1226 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1227 }
1228}
1229
1230
1231/**
1232 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1233 *
1234 * @returns VBox status code.
1235 * @retval VINF_PGM_CACHED_PAGE on success.
1236 * @retval VERR_FILE_NOT_FOUND if not found.
1237 * @param pPool The pool.
1238 * @param GCPhys The GC physical address of the page we're gonna shadow.
1239 * @param enmKind The kind of mapping.
1240 * @param iUser The shadow page pool index of the user table.
1241 * @param iUserTable The index into the user table (shadowed).
1242 * @param ppPage Where to store the pointer to the page.
1243 */
1244static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1245{
1246#ifndef IN_GC
1247 const PVM pVM = pPool->CTXSUFF(pVM);
1248#endif
1249 /*
1250 * Look up the GCPhys in the hash.
1251 */
1252 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1253 Log3(("pgmPoolCacheAlloc: %VGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1254 if (i != NIL_PGMPOOL_IDX)
1255 {
1256 do
1257 {
1258 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1259 Log3(("pgmPoolCacheAlloc: slot %d found page %VGp\n", i, pPage->GCPhys));
1260 if (pPage->GCPhys == GCPhys)
1261 {
1262 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1263 {
1264 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1265 if (VBOX_SUCCESS(rc))
1266 {
1267 *ppPage = pPage;
1268 STAM_COUNTER_INC(&pPool->StatCacheHits);
1269 return VINF_PGM_CACHED_PAGE;
1270 }
1271 return rc;
1272 }
1273
1274 /*
1275 * The kind is different. In some cases we should now flush the page
1276 * as it has been reused, but in most cases this is normal remapping
1277 * of PDs as PT or big pages using the GCPhys field in a slightly
1278 * different way than the other kinds.
1279 */
1280 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1281 {
1282 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1283 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1284 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1285 break;
1286 }
1287 }
1288
1289 /* next */
1290 i = pPage->iNext;
1291 } while (i != NIL_PGMPOOL_IDX);
1292 }
1293
1294 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1295 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1296 return VERR_FILE_NOT_FOUND;
1297}
1298
1299
1300/**
1301 * Inserts a page into the cache.
1302 *
1303 * @param pPool The pool.
1304 * @param pPage The cached page.
1305 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1306 */
1307static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1308{
1309 /*
1310 * Insert into the GCPhys hash if the page is fit for that.
1311 */
1312 Assert(!pPage->fCached);
1313 if (fCanBeCached)
1314 {
1315 pPage->fCached = true;
1316 pgmPoolHashInsert(pPool, pPage);
1317 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1318 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1319 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1320 }
1321 else
1322 {
1323 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1324 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1325 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1326 }
1327
1328 /*
1329 * Insert at the head of the age list.
1330 */
1331 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1332 pPage->iAgeNext = pPool->iAgeHead;
1333 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1334 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1335 else
1336 pPool->iAgeTail = pPage->idx;
1337 pPool->iAgeHead = pPage->idx;
1338}
1339
1340
1341/**
1342 * Flushes a cached page.
1343 *
1344 * @param pPool The pool.
1345 * @param pPage The cached page.
1346 */
1347static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1348{
1349 Log3(("pgmPoolCacheFlushPage: %VGp\n", pPage->GCPhys));
1350
1351 /*
1352 * Remove the page from the hash.
1353 */
1354 if (pPage->fCached)
1355 {
1356 pPage->fCached = false;
1357 pgmPoolHashRemove(pPool, pPage);
1358 }
1359 else
1360 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1361
1362 /*
1363 * Remove it from the age list.
1364 */
1365 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1366 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1367 else
1368 pPool->iAgeTail = pPage->iAgePrev;
1369 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1370 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1371 else
1372 pPool->iAgeHead = pPage->iAgeNext;
1373 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1374 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1375}
1376#endif /* PGMPOOL_WITH_CACHE */
1377
1378
1379#ifdef PGMPOOL_WITH_MONITORING
1380/**
1381 * Looks for pages sharing the monitor.
1382 *
1383 * @returns Pointer to the head page.
1384 * @returns NULL if not found.
1385 * @param pPool The Pool
1386 * @param pNewPage The page which is going to be monitored.
1387 */
1388static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1389{
1390#ifdef PGMPOOL_WITH_CACHE
1391 /*
1392 * Look up the GCPhys in the hash.
1393 */
1394 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1395 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1396 if (i == NIL_PGMPOOL_IDX)
1397 return NULL;
1398 do
1399 {
1400 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1401 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1402 && pPage != pNewPage)
1403 {
1404 switch (pPage->enmKind)
1405 {
1406 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1407 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1408 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1409 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1410 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1411 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1412 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1413 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1414 case PGMPOOLKIND_ROOT_32BIT_PD:
1415 case PGMPOOLKIND_ROOT_PAE_PD:
1416 case PGMPOOLKIND_ROOT_PDPT:
1417 {
1418 /* find the head */
1419 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1420 {
1421 Assert(pPage->iMonitoredPrev != pPage->idx);
1422 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1423 }
1424 return pPage;
1425 }
1426
1427 /* ignore, no monitoring. */
1428 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1429 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1430 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1431 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1432 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1433 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1434 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1435 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1436 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1437 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1438 case PGMPOOLKIND_ROOT_NESTED:
1439 break;
1440 default:
1441 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1442 }
1443 }
1444
1445 /* next */
1446 i = pPage->iNext;
1447 } while (i != NIL_PGMPOOL_IDX);
1448#endif
1449 return NULL;
1450}
1451
1452/**
1453 * Enabled write monitoring of a guest page.
1454 *
1455 * @returns VBox status code.
1456 * @retval VINF_SUCCESS on success.
1457 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1458 * @param pPool The pool.
1459 * @param pPage The cached page.
1460 */
1461static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1462{
1463 LogFlow(("pgmPoolMonitorInsert %VGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1464
1465 /*
1466 * Filter out the relevant kinds.
1467 */
1468 switch (pPage->enmKind)
1469 {
1470 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1471 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1472 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1473 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1474 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1475 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1476 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1477 case PGMPOOLKIND_ROOT_PDPT:
1478 break;
1479
1480 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1481 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1482 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1483 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1484 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1485 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1486 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1487 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1488 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1489 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1490 case PGMPOOLKIND_ROOT_NESTED:
1491 /* Nothing to monitor here. */
1492 return VINF_SUCCESS;
1493
1494 case PGMPOOLKIND_ROOT_32BIT_PD:
1495 case PGMPOOLKIND_ROOT_PAE_PD:
1496#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1497 break;
1498#endif
1499 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1500 default:
1501 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1502 }
1503
1504 /*
1505 * Install handler.
1506 */
1507 int rc;
1508 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1509 if (pPageHead)
1510 {
1511 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1512 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1513 pPage->iMonitoredPrev = pPageHead->idx;
1514 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1515 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1516 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1517 pPageHead->iMonitoredNext = pPage->idx;
1518 rc = VINF_SUCCESS;
1519 }
1520 else
1521 {
1522 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1523 PVM pVM = pPool->CTXSUFF(pVM);
1524 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1525 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1526 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1527 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1528 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1529 pPool->pfnAccessHandlerGC, MMHyperCCToRC(pVM, pPage),
1530 pPool->pszAccessHandler);
1531 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1532 * the heap size should suffice. */
1533 AssertFatalRC(rc);
1534 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1535 rc = VERR_PGM_POOL_CLEARED;
1536 }
1537 pPage->fMonitored = true;
1538 return rc;
1539}
1540
1541
1542/**
1543 * Disables write monitoring of a guest page.
1544 *
1545 * @returns VBox status code.
1546 * @retval VINF_SUCCESS on success.
1547 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1548 * @param pPool The pool.
1549 * @param pPage The cached page.
1550 */
1551static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1552{
1553 /*
1554 * Filter out the relevant kinds.
1555 */
1556 switch (pPage->enmKind)
1557 {
1558 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1559 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1560 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1561 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1562 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1563 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1564 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1565 case PGMPOOLKIND_ROOT_PDPT:
1566 break;
1567
1568 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1569 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1570 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1571 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1572 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1573 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1574 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1575 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1576 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1577 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1578 case PGMPOOLKIND_ROOT_NESTED:
1579 /* Nothing to monitor here. */
1580 return VINF_SUCCESS;
1581
1582 case PGMPOOLKIND_ROOT_32BIT_PD:
1583 case PGMPOOLKIND_ROOT_PAE_PD:
1584#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1585 break;
1586#endif
1587 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1588 default:
1589 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1590 }
1591
1592 /*
1593 * Remove the page from the monitored list or uninstall it if last.
1594 */
1595 const PVM pVM = pPool->CTXSUFF(pVM);
1596 int rc;
1597 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1598 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1599 {
1600 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1601 {
1602 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1603 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1604 pNewHead->fCR3Mix = pPage->fCR3Mix;
1605 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1606 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1607 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1608 pPool->pfnAccessHandlerGC, MMHyperCCToRC(pVM, pNewHead),
1609 pPool->pszAccessHandler);
1610 AssertFatalRCSuccess(rc);
1611 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1612 }
1613 else
1614 {
1615 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1616 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1617 {
1618 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1619 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1620 }
1621 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1622 rc = VINF_SUCCESS;
1623 }
1624 }
1625 else
1626 {
1627 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1628 AssertFatalRC(rc);
1629 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1630 rc = VERR_PGM_POOL_CLEARED;
1631 }
1632 pPage->fMonitored = false;
1633
1634 /*
1635 * Remove it from the list of modified pages (if in it).
1636 */
1637 pgmPoolMonitorModifiedRemove(pPool, pPage);
1638
1639 return rc;
1640}
1641
1642
1643#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1644/**
1645 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1646 *
1647 * @param pPool The Pool.
1648 * @param pPage A page in the chain.
1649 * @param fCR3Mix The new fCR3Mix value.
1650 */
1651static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1652{
1653 /* current */
1654 pPage->fCR3Mix = fCR3Mix;
1655
1656 /* before */
1657 int16_t idx = pPage->iMonitoredPrev;
1658 while (idx != NIL_PGMPOOL_IDX)
1659 {
1660 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1661 idx = pPool->aPages[idx].iMonitoredPrev;
1662 }
1663
1664 /* after */
1665 idx = pPage->iMonitoredNext;
1666 while (idx != NIL_PGMPOOL_IDX)
1667 {
1668 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1669 idx = pPool->aPages[idx].iMonitoredNext;
1670 }
1671}
1672
1673
1674/**
1675 * Installs or modifies monitoring of a CR3 page (special).
1676 *
1677 * We're pretending the CR3 page is shadowed by the pool so we can use the
1678 * generic mechanisms in detecting chained monitoring. (This also gives us a
1679 * tast of what code changes are required to really pool CR3 shadow pages.)
1680 *
1681 * @returns VBox status code.
1682 * @param pPool The pool.
1683 * @param idxRoot The CR3 (root) page index.
1684 * @param GCPhysCR3 The (new) CR3 value.
1685 */
1686int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1687{
1688 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1689 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1690 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1691 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1692
1693 /*
1694 * The unlikely case where it already matches.
1695 */
1696 if (pPage->GCPhys == GCPhysCR3)
1697 {
1698 Assert(pPage->fMonitored);
1699 return VINF_SUCCESS;
1700 }
1701
1702 /*
1703 * Flush the current monitoring and remove it from the hash.
1704 */
1705 int rc = VINF_SUCCESS;
1706 if (pPage->fMonitored)
1707 {
1708 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1709 rc = pgmPoolMonitorFlush(pPool, pPage);
1710 if (rc == VERR_PGM_POOL_CLEARED)
1711 rc = VINF_SUCCESS;
1712 else
1713 AssertFatalRC(rc);
1714 pgmPoolHashRemove(pPool, pPage);
1715 }
1716
1717 /*
1718 * Monitor the page at the new location and insert it into the hash.
1719 */
1720 pPage->GCPhys = GCPhysCR3;
1721 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1722 if (rc2 != VERR_PGM_POOL_CLEARED)
1723 {
1724 AssertFatalRC(rc2);
1725 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1726 rc = rc2;
1727 }
1728 pgmPoolHashInsert(pPool, pPage);
1729 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1730 return rc;
1731}
1732
1733
1734/**
1735 * Removes the monitoring of a CR3 page (special).
1736 *
1737 * @returns VBox status code.
1738 * @param pPool The pool.
1739 * @param idxRoot The CR3 (root) page index.
1740 */
1741int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1742{
1743 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1744 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1745 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1746 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1747
1748 if (!pPage->fMonitored)
1749 return VINF_SUCCESS;
1750
1751 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1752 int rc = pgmPoolMonitorFlush(pPool, pPage);
1753 if (rc != VERR_PGM_POOL_CLEARED)
1754 AssertFatalRC(rc);
1755 else
1756 rc = VINF_SUCCESS;
1757 pgmPoolHashRemove(pPool, pPage);
1758 Assert(!pPage->fMonitored);
1759 pPage->GCPhys = NIL_RTGCPHYS;
1760 return rc;
1761}
1762#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1763
1764
1765/**
1766 * Inserts the page into the list of modified pages.
1767 *
1768 * @param pPool The pool.
1769 * @param pPage The page.
1770 */
1771void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1772{
1773 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1774 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1775 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1776 && pPool->iModifiedHead != pPage->idx,
1777 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1778 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1779 pPool->iModifiedHead, pPool->cModifiedPages));
1780
1781 pPage->iModifiedNext = pPool->iModifiedHead;
1782 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1783 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1784 pPool->iModifiedHead = pPage->idx;
1785 pPool->cModifiedPages++;
1786#ifdef VBOX_WITH_STATISTICS
1787 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1788 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1789#endif
1790}
1791
1792
1793/**
1794 * Removes the page from the list of modified pages and resets the
1795 * moficiation counter.
1796 *
1797 * @param pPool The pool.
1798 * @param pPage The page which is believed to be in the list of modified pages.
1799 */
1800static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1801{
1802 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1803 if (pPool->iModifiedHead == pPage->idx)
1804 {
1805 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1806 pPool->iModifiedHead = pPage->iModifiedNext;
1807 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1808 {
1809 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1810 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1811 }
1812 pPool->cModifiedPages--;
1813 }
1814 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1815 {
1816 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1817 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1818 {
1819 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1820 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1821 }
1822 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1823 pPool->cModifiedPages--;
1824 }
1825 else
1826 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1827 pPage->cModifications = 0;
1828}
1829
1830
1831/**
1832 * Zaps the list of modified pages, resetting their modification counters in the process.
1833 *
1834 * @param pVM The VM handle.
1835 */
1836void pgmPoolMonitorModifiedClearAll(PVM pVM)
1837{
1838 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1839 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1840
1841 unsigned cPages = 0; NOREF(cPages);
1842 uint16_t idx = pPool->iModifiedHead;
1843 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1844 while (idx != NIL_PGMPOOL_IDX)
1845 {
1846 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1847 idx = pPage->iModifiedNext;
1848 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1849 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1850 pPage->cModifications = 0;
1851 Assert(++cPages);
1852 }
1853 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1854 pPool->cModifiedPages = 0;
1855}
1856
1857
1858/**
1859 * Clear all shadow pages and clear all modification counters.
1860 *
1861 * @param pVM The VM handle.
1862 * @remark Should only be used when monitoring is available, thus placed in
1863 * the PGMPOOL_WITH_MONITORING #ifdef.
1864 */
1865void pgmPoolClearAll(PVM pVM)
1866{
1867 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1868 STAM_PROFILE_START(&pPool->StatClearAll, c);
1869 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1870
1871 /*
1872 * Iterate all the pages until we've encountered all that in use.
1873 * This is simple but not quite optimal solution.
1874 */
1875 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1876 unsigned cLeft = pPool->cUsedPages;
1877 unsigned iPage = pPool->cCurPages;
1878 while (--iPage >= PGMPOOL_IDX_FIRST)
1879 {
1880 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1881 if (pPage->GCPhys != NIL_RTGCPHYS)
1882 {
1883 switch (pPage->enmKind)
1884 {
1885 /*
1886 * We only care about shadow page tables.
1887 */
1888 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1889 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1890 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1891 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1892 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1893 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1894 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1895 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1896 {
1897#ifdef PGMPOOL_WITH_USER_TRACKING
1898 if (pPage->cPresent)
1899#endif
1900 {
1901 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1902 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1903 ASMMemZeroPage(pvShw);
1904 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1905#ifdef PGMPOOL_WITH_USER_TRACKING
1906 pPage->cPresent = 0;
1907 pPage->iFirstPresent = ~0;
1908#endif
1909 }
1910 }
1911 /* fall thru */
1912
1913 default:
1914 Assert(!pPage->cModifications || ++cModifiedPages);
1915 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1916 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1917 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1918 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1919 pPage->cModifications = 0;
1920 break;
1921
1922 }
1923 if (!--cLeft)
1924 break;
1925 }
1926 }
1927
1928 /* swipe the special pages too. */
1929 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1930 {
1931 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1932 if (pPage->GCPhys != NIL_RTGCPHYS)
1933 {
1934 Assert(!pPage->cModifications || ++cModifiedPages);
1935 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1936 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1937 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1938 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1939 pPage->cModifications = 0;
1940 }
1941 }
1942
1943#ifndef DEBUG_michael
1944 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1945#endif
1946 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1947 pPool->cModifiedPages = 0;
1948
1949#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1950 /*
1951 * Clear all the GCPhys links and rebuild the phys ext free list.
1952 */
1953 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
1954 pRam;
1955 pRam = CTXALLSUFF(pRam->pNext))
1956 {
1957 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1958 while (iPage-- > 0)
1959 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
1960 }
1961
1962 pPool->iPhysExtFreeHead = 0;
1963 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1964 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1965 for (unsigned i = 0; i < cMaxPhysExts; i++)
1966 {
1967 paPhysExts[i].iNext = i + 1;
1968 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1969 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1970 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1971 }
1972 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1973#endif
1974
1975
1976 pPool->cPresent = 0;
1977 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1978}
1979
1980/**
1981 * Handle SyncCR3 pool tasks
1982 *
1983 * @returns VBox status code.
1984 * @retval VINF_SUCCESS if successfully added.
1985 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
1986 * @param pVM The VM handle.
1987 * @remark Should only be used when monitoring is available, thus placed in
1988 * the PGMPOOL_WITH_MONITORING #ifdef.
1989 */
1990int pgmPoolSyncCR3(PVM pVM)
1991{
1992 /*
1993 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
1994 * Occasionally we will have to clear all the shadow page tables because we wanted
1995 * to monitor a page which was mapped by too many shadowed page tables. This operation
1996 * sometimes refered to as a 'lightweight flush'.
1997 */
1998 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
1999 pgmPoolMonitorModifiedClearAll(pVM);
2000 else
2001 {
2002# ifndef IN_GC
2003 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2004 pgmPoolClearAll(pVM);
2005# else
2006 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2007 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2008 return VINF_PGM_SYNC_CR3;
2009# endif
2010 }
2011 return VINF_SUCCESS;
2012}
2013#endif /* PGMPOOL_WITH_MONITORING */
2014
2015#ifdef PGMPOOL_WITH_USER_TRACKING
2016/**
2017 * Frees up at least one user entry.
2018 *
2019 * @returns VBox status code.
2020 * @retval VINF_SUCCESS if successfully added.
2021 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2022 * @param pPool The pool.
2023 * @param iUser The user index.
2024 */
2025static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2026{
2027 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2028#ifdef PGMPOOL_WITH_CACHE
2029 /*
2030 * Just free cached pages in a braindead fashion.
2031 */
2032 /** @todo walk the age list backwards and free the first with usage. */
2033 int rc = VINF_SUCCESS;
2034 do
2035 {
2036 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2037 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
2038 rc = rc2;
2039 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2040 return rc;
2041#else
2042 /*
2043 * Lazy approach.
2044 */
2045 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2046 Assert(!CPUMIsGuestInLongMode(pVM));
2047 pgmPoolFlushAllInt(pPool);
2048 return VERR_PGM_POOL_FLUSHED;
2049#endif
2050}
2051
2052
2053/**
2054 * Inserts a page into the cache.
2055 *
2056 * This will create user node for the page, insert it into the GCPhys
2057 * hash, and insert it into the age list.
2058 *
2059 * @returns VBox status code.
2060 * @retval VINF_SUCCESS if successfully added.
2061 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2062 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2063 * @param pPool The pool.
2064 * @param pPage The cached page.
2065 * @param GCPhys The GC physical address of the page we're gonna shadow.
2066 * @param iUser The user index.
2067 * @param iUserTable The user table index.
2068 */
2069DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2070{
2071 int rc = VINF_SUCCESS;
2072 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
2073
2074 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
2075
2076 /*
2077 * Find free a user node.
2078 */
2079 uint16_t i = pPool->iUserFreeHead;
2080 if (i == NIL_PGMPOOL_USER_INDEX)
2081 {
2082 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2083 if (VBOX_FAILURE(rc))
2084 return rc;
2085 i = pPool->iUserFreeHead;
2086 }
2087
2088 /*
2089 * Unlink the user node from the free list,
2090 * initialize and insert it into the user list.
2091 */
2092 pPool->iUserFreeHead = pUser[i].iNext;
2093 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
2094 pUser[i].iUser = iUser;
2095 pUser[i].iUserTable = iUserTable;
2096 pPage->iUserHead = i;
2097
2098 /*
2099 * Insert into cache and enable monitoring of the guest page if enabled.
2100 *
2101 * Until we implement caching of all levels, including the CR3 one, we'll
2102 * have to make sure we don't try monitor & cache any recursive reuse of
2103 * a monitored CR3 page. Because all windows versions are doing this we'll
2104 * have to be able to do combined access monitoring, CR3 + PT and
2105 * PD + PT (guest PAE).
2106 *
2107 * Update:
2108 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2109 */
2110#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2111# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2112 const bool fCanBeMonitored = true;
2113# else
2114 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2115 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2116 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2117# endif
2118# ifdef PGMPOOL_WITH_CACHE
2119 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2120# endif
2121 if (fCanBeMonitored)
2122 {
2123# ifdef PGMPOOL_WITH_MONITORING
2124 rc = pgmPoolMonitorInsert(pPool, pPage);
2125 if (rc == VERR_PGM_POOL_CLEARED)
2126 {
2127 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2128# ifndef PGMPOOL_WITH_CACHE
2129 pgmPoolMonitorFlush(pPool, pPage);
2130 rc = VERR_PGM_POOL_FLUSHED;
2131# endif
2132 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2133 pUser[i].iNext = pPool->iUserFreeHead;
2134 pUser[i].iUser = NIL_PGMPOOL_IDX;
2135 pPool->iUserFreeHead = i;
2136 }
2137 }
2138# endif
2139#endif /* PGMPOOL_WITH_MONITORING */
2140 return rc;
2141}
2142
2143
2144# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2145/**
2146 * Adds a user reference to a page.
2147 *
2148 * This will
2149 * This will move the page to the head of the
2150 *
2151 * @returns VBox status code.
2152 * @retval VINF_SUCCESS if successfully added.
2153 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2154 * @param pPool The pool.
2155 * @param pPage The cached page.
2156 * @param iUser The user index.
2157 * @param iUserTable The user table.
2158 */
2159static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2160{
2161 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2162
2163 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
2164# ifdef VBOX_STRICT
2165 /*
2166 * Check that the entry doesn't already exists.
2167 */
2168 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2169 {
2170 uint16_t i = pPage->iUserHead;
2171 do
2172 {
2173 Assert(i < pPool->cMaxUsers);
2174 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2175 i = paUsers[i].iNext;
2176 } while (i != NIL_PGMPOOL_USER_INDEX);
2177 }
2178# endif
2179
2180 /*
2181 * Allocate a user node.
2182 */
2183 uint16_t i = pPool->iUserFreeHead;
2184 if (i == NIL_PGMPOOL_USER_INDEX)
2185 {
2186 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2187 if (VBOX_FAILURE(rc))
2188 return rc;
2189 i = pPool->iUserFreeHead;
2190 }
2191 pPool->iUserFreeHead = paUsers[i].iNext;
2192
2193 /*
2194 * Initialize the user node and insert it.
2195 */
2196 paUsers[i].iNext = pPage->iUserHead;
2197 paUsers[i].iUser = iUser;
2198 paUsers[i].iUserTable = iUserTable;
2199 pPage->iUserHead = i;
2200
2201# ifdef PGMPOOL_WITH_CACHE
2202 /*
2203 * Tell the cache to update its replacement stats for this page.
2204 */
2205 pgmPoolCacheUsed(pPool, pPage);
2206# endif
2207 return VINF_SUCCESS;
2208}
2209# endif /* PGMPOOL_WITH_CACHE */
2210
2211
2212/**
2213 * Frees a user record associated with a page.
2214 *
2215 * This does not clear the entry in the user table, it simply replaces the
2216 * user record to the chain of free records.
2217 *
2218 * @param pPool The pool.
2219 * @param HCPhys The HC physical address of the shadow page.
2220 * @param iUser The shadow page pool index of the user table.
2221 * @param iUserTable The index into the user table (shadowed).
2222 */
2223static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2224{
2225 /*
2226 * Unlink and free the specified user entry.
2227 */
2228 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2229
2230 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2231 uint16_t i = pPage->iUserHead;
2232 if ( i != NIL_PGMPOOL_USER_INDEX
2233 && paUsers[i].iUser == iUser
2234 && paUsers[i].iUserTable == iUserTable)
2235 {
2236 pPage->iUserHead = paUsers[i].iNext;
2237
2238 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2239 paUsers[i].iNext = pPool->iUserFreeHead;
2240 pPool->iUserFreeHead = i;
2241 return;
2242 }
2243
2244 /* General: Linear search. */
2245 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2246 while (i != NIL_PGMPOOL_USER_INDEX)
2247 {
2248 if ( paUsers[i].iUser == iUser
2249 && paUsers[i].iUserTable == iUserTable)
2250 {
2251 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2252 paUsers[iPrev].iNext = paUsers[i].iNext;
2253 else
2254 pPage->iUserHead = paUsers[i].iNext;
2255
2256 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2257 paUsers[i].iNext = pPool->iUserFreeHead;
2258 pPool->iUserFreeHead = i;
2259 return;
2260 }
2261 iPrev = i;
2262 i = paUsers[i].iNext;
2263 }
2264
2265 /* Fatal: didn't find it */
2266 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
2267 iUser, iUserTable, pPage->GCPhys));
2268}
2269
2270
2271/**
2272 * Gets the entry size of a shadow table.
2273 *
2274 * @param enmKind The kind of page.
2275 *
2276 * @returns The size of the entry in bytes. That is, 4 or 8.
2277 * @returns If the kind is not for a table, an assertion is raised and 0 is
2278 * returned.
2279 */
2280DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2281{
2282 switch (enmKind)
2283 {
2284 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2285 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2286 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2287 case PGMPOOLKIND_ROOT_32BIT_PD:
2288 return 4;
2289
2290 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2291 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2292 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2293 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2294 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2295 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2296 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2297 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2298 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2299 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2300 case PGMPOOLKIND_ROOT_PAE_PD:
2301 case PGMPOOLKIND_ROOT_PDPT:
2302 case PGMPOOLKIND_ROOT_NESTED:
2303 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2304 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2305 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2306 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2307 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2308 return 8;
2309
2310 default:
2311 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2312 }
2313}
2314
2315
2316/**
2317 * Gets the entry size of a guest table.
2318 *
2319 * @param enmKind The kind of page.
2320 *
2321 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2322 * @returns If the kind is not for a table, an assertion is raised and 0 is
2323 * returned.
2324 */
2325DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2326{
2327 switch (enmKind)
2328 {
2329 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2330 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2331 case PGMPOOLKIND_ROOT_32BIT_PD:
2332 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2333 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2334 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2335 return 4;
2336
2337 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2338 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2339 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2340 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2341 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2342 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2343 case PGMPOOLKIND_ROOT_PAE_PD:
2344 case PGMPOOLKIND_ROOT_PDPT:
2345 return 8;
2346
2347 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2348 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2349 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2350 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2351 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2352 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2353 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2354 case PGMPOOLKIND_ROOT_NESTED:
2355 /** @todo can we return 0? (nobody is calling this...) */
2356 AssertFailed();
2357 return 0;
2358
2359 default:
2360 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2361 }
2362}
2363
2364
2365#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2366/**
2367 * Scans one shadow page table for mappings of a physical page.
2368 *
2369 * @param pVM The VM handle.
2370 * @param pPhysPage The guest page in question.
2371 * @param iShw The shadow page table.
2372 * @param cRefs The number of references made in that PT.
2373 */
2374static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2375{
2376 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2377 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2378
2379 /*
2380 * Assert sanity.
2381 */
2382 Assert(cRefs == 1);
2383 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2384 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2385
2386 /*
2387 * Then, clear the actual mappings to the page in the shadow PT.
2388 */
2389 switch (pPage->enmKind)
2390 {
2391 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2392 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2393 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2394 {
2395 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2396 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2397 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2398 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2399 {
2400 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2401 pPT->a[i].u = 0;
2402 cRefs--;
2403 if (!cRefs)
2404 return;
2405 }
2406#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2407 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2408 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2409 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2410 {
2411 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2412 pPT->a[i].u = 0;
2413 }
2414#endif
2415 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2416 break;
2417 }
2418
2419 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2420 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2421 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2422 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2423 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2424 {
2425 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2426 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2427 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2428 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2429 {
2430 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2431 pPT->a[i].u = 0;
2432 cRefs--;
2433 if (!cRefs)
2434 return;
2435 }
2436#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2437 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2438 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2439 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2440 {
2441 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2442 pPT->a[i].u = 0;
2443 }
2444#endif
2445 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2446 break;
2447 }
2448
2449 default:
2450 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2451 }
2452}
2453
2454
2455/**
2456 * Scans one shadow page table for mappings of a physical page.
2457 *
2458 * @param pVM The VM handle.
2459 * @param pPhysPage The guest page in question.
2460 * @param iShw The shadow page table.
2461 * @param cRefs The number of references made in that PT.
2462 */
2463void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2464{
2465 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2466 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2467 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2468 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2469 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2470 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2471}
2472
2473
2474/**
2475 * Flushes a list of shadow page tables mapping the same physical page.
2476 *
2477 * @param pVM The VM handle.
2478 * @param pPhysPage The guest page in question.
2479 * @param iPhysExt The physical cross reference extent list to flush.
2480 */
2481void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2482{
2483 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2484 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2485 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2486
2487 const uint16_t iPhysExtStart = iPhysExt;
2488 PPGMPOOLPHYSEXT pPhysExt;
2489 do
2490 {
2491 Assert(iPhysExt < pPool->cMaxPhysExts);
2492 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2493 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2494 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2495 {
2496 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2497 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2498 }
2499
2500 /* next */
2501 iPhysExt = pPhysExt->iNext;
2502 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2503
2504 /* insert the list into the free list and clear the ram range entry. */
2505 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2506 pPool->iPhysExtFreeHead = iPhysExtStart;
2507 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2508
2509 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2510}
2511#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2512
2513
2514/**
2515 * Scans all shadow page tables for mappings of a physical page.
2516 *
2517 * This may be slow, but it's most likely more efficient than cleaning
2518 * out the entire page pool / cache.
2519 *
2520 * @returns VBox status code.
2521 * @retval VINF_SUCCESS if all references has been successfully cleared.
2522 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2523 * a page pool cleaning.
2524 *
2525 * @param pVM The VM handle.
2526 * @param pPhysPage The guest page in question.
2527 */
2528int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2529{
2530 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2531 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2532 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2533 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2534
2535#if 1
2536 /*
2537 * There is a limit to what makes sense.
2538 */
2539 if (pPool->cPresent > 1024)
2540 {
2541 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2542 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2543 return VINF_PGM_GCPHYS_ALIASED;
2544 }
2545#endif
2546
2547 /*
2548 * Iterate all the pages until we've encountered all that in use.
2549 * This is simple but not quite optimal solution.
2550 */
2551 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2552 const uint32_t u32 = u64;
2553 unsigned cLeft = pPool->cUsedPages;
2554 unsigned iPage = pPool->cCurPages;
2555 while (--iPage >= PGMPOOL_IDX_FIRST)
2556 {
2557 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2558 if (pPage->GCPhys != NIL_RTGCPHYS)
2559 {
2560 switch (pPage->enmKind)
2561 {
2562 /*
2563 * We only care about shadow page tables.
2564 */
2565 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2566 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2567 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2568 {
2569 unsigned cPresent = pPage->cPresent;
2570 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2571 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2572 if (pPT->a[i].n.u1Present)
2573 {
2574 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2575 {
2576 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2577 pPT->a[i].u = 0;
2578 }
2579 if (!--cPresent)
2580 break;
2581 }
2582 break;
2583 }
2584
2585 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2586 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2587 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2588 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2589 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2590 {
2591 unsigned cPresent = pPage->cPresent;
2592 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2593 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2594 if (pPT->a[i].n.u1Present)
2595 {
2596 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2597 {
2598 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2599 pPT->a[i].u = 0;
2600 }
2601 if (!--cPresent)
2602 break;
2603 }
2604 break;
2605 }
2606 }
2607 if (!--cLeft)
2608 break;
2609 }
2610 }
2611
2612 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2613 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2614 return VINF_SUCCESS;
2615}
2616
2617
2618/**
2619 * Clears the user entry in a user table.
2620 *
2621 * This is used to remove all references to a page when flushing it.
2622 */
2623static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2624{
2625 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2626 Assert(pUser->iUser < pPool->cCurPages);
2627
2628 /*
2629 * Map the user page.
2630 */
2631 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2632 union
2633 {
2634 uint64_t *pau64;
2635 uint32_t *pau32;
2636 } u;
2637 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pUserPage);
2638
2639 /* Safety precaution in case we change the paging for other modes too in the future. */
2640 Assert(PGMGetHyperCR3(CTXSUFF(pPool->pVM)) != pPage->Core.Key);
2641
2642#ifdef VBOX_STRICT
2643 /*
2644 * Some sanity checks.
2645 */
2646 switch (pUserPage->enmKind)
2647 {
2648 case PGMPOOLKIND_ROOT_32BIT_PD:
2649 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2650 Assert(!(u.pau32[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2651 break;
2652 case PGMPOOLKIND_ROOT_PAE_PD:
2653 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2654 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2655 break;
2656 case PGMPOOLKIND_ROOT_PDPT:
2657 Assert(pUser->iUserTable < 4);
2658 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2659 break;
2660 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2661 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2662 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2663 break;
2664 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2665 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2666 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2667 break;
2668 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2669 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2670 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2671 break;
2672 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2673 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2674 /* GCPhys >> PAGE_SHIFT is the index here */
2675 break;
2676 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2677 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2678 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2679 break;
2680
2681 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2682 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2683 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2684 break;
2685
2686 case PGMPOOLKIND_ROOT_NESTED:
2687 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2688 break;
2689
2690 default:
2691 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2692 break;
2693 }
2694#endif /* VBOX_STRICT */
2695
2696 /*
2697 * Clear the entry in the user page.
2698 */
2699 switch (pUserPage->enmKind)
2700 {
2701 /* 32-bit entries */
2702 case PGMPOOLKIND_ROOT_32BIT_PD:
2703 u.pau32[pUser->iUserTable] = 0;
2704 break;
2705
2706 /* 64-bit entries */
2707 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2708 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2709 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2710 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2711 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2712 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2713 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2714 case PGMPOOLKIND_ROOT_PAE_PD:
2715 case PGMPOOLKIND_ROOT_PDPT:
2716 case PGMPOOLKIND_ROOT_NESTED:
2717 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2718 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2719 u.pau64[pUser->iUserTable] = 0;
2720 break;
2721
2722 default:
2723 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2724 }
2725}
2726
2727
2728/**
2729 * Clears all users of a page.
2730 */
2731static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2732{
2733 /*
2734 * Free all the user records.
2735 */
2736 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2737 uint16_t i = pPage->iUserHead;
2738 while (i != NIL_PGMPOOL_USER_INDEX)
2739 {
2740 /* Clear enter in user table. */
2741 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2742
2743 /* Free it. */
2744 const uint16_t iNext = paUsers[i].iNext;
2745 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2746 paUsers[i].iNext = pPool->iUserFreeHead;
2747 pPool->iUserFreeHead = i;
2748
2749 /* Next. */
2750 i = iNext;
2751 }
2752 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2753}
2754
2755
2756#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2757/**
2758 * Allocates a new physical cross reference extent.
2759 *
2760 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2761 * @param pVM The VM handle.
2762 * @param piPhysExt Where to store the phys ext index.
2763 */
2764PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2765{
2766 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2767 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2768 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2769 {
2770 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2771 return NULL;
2772 }
2773 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2774 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2775 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2776 *piPhysExt = iPhysExt;
2777 return pPhysExt;
2778}
2779
2780
2781/**
2782 * Frees a physical cross reference extent.
2783 *
2784 * @param pVM The VM handle.
2785 * @param iPhysExt The extent to free.
2786 */
2787void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2788{
2789 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2790 Assert(iPhysExt < pPool->cMaxPhysExts);
2791 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2792 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2793 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2794 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2795 pPool->iPhysExtFreeHead = iPhysExt;
2796}
2797
2798
2799/**
2800 * Frees a physical cross reference extent.
2801 *
2802 * @param pVM The VM handle.
2803 * @param iPhysExt The extent to free.
2804 */
2805void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2806{
2807 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2808
2809 const uint16_t iPhysExtStart = iPhysExt;
2810 PPGMPOOLPHYSEXT pPhysExt;
2811 do
2812 {
2813 Assert(iPhysExt < pPool->cMaxPhysExts);
2814 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2815 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2816 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2817
2818 /* next */
2819 iPhysExt = pPhysExt->iNext;
2820 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2821
2822 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2823 pPool->iPhysExtFreeHead = iPhysExtStart;
2824}
2825
2826/**
2827 * Insert a reference into a list of physical cross reference extents.
2828 *
2829 * @returns The new ram range flags (top 16-bits).
2830 *
2831 * @param pVM The VM handle.
2832 * @param iPhysExt The physical extent index of the list head.
2833 * @param iShwPT The shadow page table index.
2834 *
2835 */
2836static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2837{
2838 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2839 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2840
2841 /* special common case. */
2842 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2843 {
2844 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2845 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2846 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2847 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2848 }
2849
2850 /* general treatment. */
2851 const uint16_t iPhysExtStart = iPhysExt;
2852 unsigned cMax = 15;
2853 for (;;)
2854 {
2855 Assert(iPhysExt < pPool->cMaxPhysExts);
2856 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2857 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2858 {
2859 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2860 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2861 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2862 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2863 }
2864 if (!--cMax)
2865 {
2866 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2867 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2868 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2869 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2870 }
2871 }
2872
2873 /* add another extent to the list. */
2874 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2875 if (!pNew)
2876 {
2877 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2878 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2879 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2880 }
2881 pNew->iNext = iPhysExtStart;
2882 pNew->aidx[0] = iShwPT;
2883 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2884 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2885}
2886
2887
2888/**
2889 * Add a reference to guest physical page where extents are in use.
2890 *
2891 * @returns The new ram range flags (top 16-bits).
2892 *
2893 * @param pVM The VM handle.
2894 * @param u16 The ram range flags (top 16-bits).
2895 * @param iShwPT The shadow page table index.
2896 */
2897uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2898{
2899 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2900 {
2901 /*
2902 * Convert to extent list.
2903 */
2904 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2905 uint16_t iPhysExt;
2906 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2907 if (pPhysExt)
2908 {
2909 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2910 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2911 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2912 pPhysExt->aidx[1] = iShwPT;
2913 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2914 }
2915 else
2916 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2917 }
2918 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2919 {
2920 /*
2921 * Insert into the extent list.
2922 */
2923 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2924 }
2925 else
2926 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2927 return u16;
2928}
2929
2930
2931/**
2932 * Clear references to guest physical memory.
2933 *
2934 * @param pPool The pool.
2935 * @param pPage The page.
2936 * @param pPhysPage Pointer to the aPages entry in the ram range.
2937 */
2938void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
2939{
2940 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
2941 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2942
2943 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2944 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2945 {
2946 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2947 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2948 do
2949 {
2950 Assert(iPhysExt < pPool->cMaxPhysExts);
2951
2952 /*
2953 * Look for the shadow page and check if it's all freed.
2954 */
2955 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2956 {
2957 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2958 {
2959 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2960
2961 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2962 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2963 {
2964 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2965 return;
2966 }
2967
2968 /* we can free the node. */
2969 PVM pVM = pPool->CTXSUFF(pVM);
2970 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2971 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2972 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2973 {
2974 /* lonely node */
2975 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2976 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
2977 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2978 }
2979 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2980 {
2981 /* head */
2982 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
2983 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
2984 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2985 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2986 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2987 }
2988 else
2989 {
2990 /* in list */
2991 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2992 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2993 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2994 }
2995 iPhysExt = iPhysExtNext;
2996 return;
2997 }
2998 }
2999
3000 /* next */
3001 iPhysExtPrev = iPhysExt;
3002 iPhysExt = paPhysExts[iPhysExt].iNext;
3003 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3004
3005 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3006 }
3007 else /* nothing to do */
3008 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
3009}
3010
3011
3012
3013/**
3014 * Clear references to guest physical memory.
3015 *
3016 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3017 * is assumed to be correct, so the linear search can be skipped and we can assert
3018 * at an earlier point.
3019 *
3020 * @param pPool The pool.
3021 * @param pPage The page.
3022 * @param HCPhys The host physical address corresponding to the guest page.
3023 * @param GCPhys The guest physical address corresponding to HCPhys.
3024 */
3025static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3026{
3027 /*
3028 * Walk range list.
3029 */
3030 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3031 while (pRam)
3032 {
3033 RTGCPHYS off = GCPhys - pRam->GCPhys;
3034 if (off < pRam->cb)
3035 {
3036 /* does it match? */
3037 const unsigned iPage = off >> PAGE_SHIFT;
3038 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3039RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3040Log(("pgmPoolTracDerefGCPhys %VHp vs %VHp\n", HCPhysPage, HCPhys));
3041 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3042 {
3043 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3044 return;
3045 }
3046 break;
3047 }
3048 pRam = CTXALLSUFF(pRam->pNext);
3049 }
3050 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
3051}
3052
3053
3054/**
3055 * Clear references to guest physical memory.
3056 *
3057 * @param pPool The pool.
3058 * @param pPage The page.
3059 * @param HCPhys The host physical address corresponding to the guest page.
3060 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3061 */
3062static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3063{
3064 /*
3065 * Walk range list.
3066 */
3067 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3068 while (pRam)
3069 {
3070 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3071 if (off < pRam->cb)
3072 {
3073 /* does it match? */
3074 const unsigned iPage = off >> PAGE_SHIFT;
3075 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3076 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3077 {
3078 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3079 return;
3080 }
3081 break;
3082 }
3083 pRam = CTXALLSUFF(pRam->pNext);
3084 }
3085
3086 /*
3087 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3088 */
3089 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3090 pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3091 while (pRam)
3092 {
3093 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3094 while (iPage-- > 0)
3095 {
3096 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3097 {
3098 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
3099 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3100 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3101 return;
3102 }
3103 }
3104 pRam = CTXALLSUFF(pRam->pNext);
3105 }
3106
3107 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
3108}
3109
3110
3111/**
3112 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3113 *
3114 * @param pPool The pool.
3115 * @param pPage The page.
3116 * @param pShwPT The shadow page table (mapping of the page).
3117 * @param pGstPT The guest page table.
3118 */
3119DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3120{
3121 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3122 if (pShwPT->a[i].n.u1Present)
3123 {
3124 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3125 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3126 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3127 if (!--pPage->cPresent)
3128 break;
3129 }
3130}
3131
3132
3133/**
3134 * Clear references to guest physical memory in a PAE / 32-bit page table.
3135 *
3136 * @param pPool The pool.
3137 * @param pPage The page.
3138 * @param pShwPT The shadow page table (mapping of the page).
3139 * @param pGstPT The guest page table (just a half one).
3140 */
3141DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3142{
3143 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3144 if (pShwPT->a[i].n.u1Present)
3145 {
3146 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3147 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3148 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3149 }
3150}
3151
3152
3153/**
3154 * Clear references to guest physical memory in a PAE / PAE page table.
3155 *
3156 * @param pPool The pool.
3157 * @param pPage The page.
3158 * @param pShwPT The shadow page table (mapping of the page).
3159 * @param pGstPT The guest page table.
3160 */
3161DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3162{
3163 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3164 if (pShwPT->a[i].n.u1Present)
3165 {
3166 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3167 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3168 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3169 }
3170}
3171
3172
3173/**
3174 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3175 *
3176 * @param pPool The pool.
3177 * @param pPage The page.
3178 * @param pShwPT The shadow page table (mapping of the page).
3179 */
3180DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3181{
3182 RTGCPHYS GCPhys = pPage->GCPhys;
3183 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3184 if (pShwPT->a[i].n.u1Present)
3185 {
3186 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3187 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3188 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3189 }
3190}
3191
3192
3193/**
3194 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3195 *
3196 * @param pPool The pool.
3197 * @param pPage The page.
3198 * @param pShwPT The shadow page table (mapping of the page).
3199 */
3200DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3201{
3202 RTGCPHYS GCPhys = pPage->GCPhys;
3203 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3204 if (pShwPT->a[i].n.u1Present)
3205 {
3206 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%VGp\n",
3207 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3208 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3209 }
3210}
3211#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3212
3213
3214/**
3215 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3216 *
3217 * @param pPool The pool.
3218 * @param pPage The page.
3219 * @param pShwPD The shadow page directory (mapping of the page).
3220 */
3221DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3222{
3223 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3224 {
3225 if (pShwPD->a[i].n.u1Present)
3226 {
3227 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3228 if (pSubPage)
3229 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3230 else
3231 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3232 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3233 }
3234 }
3235}
3236
3237
3238/**
3239 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3240 *
3241 * @param pPool The pool.
3242 * @param pPage The page.
3243 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3244 */
3245DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3246{
3247 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3248 {
3249 if (pShwPDPT->a[i].n.u1Present)
3250 {
3251 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3252 if (pSubPage)
3253 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3254 else
3255 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3256 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3257 }
3258 }
3259}
3260
3261/**
3262 * Clear references to shadowed pages in a 64-bit level 4 page table.
3263 *
3264 * @param pPool The pool.
3265 * @param pPage The page.
3266 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3267 */
3268DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3269{
3270 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3271 {
3272 if (pShwPML4->a[i].n.u1Present)
3273 {
3274 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3275 if (pSubPage)
3276 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3277 else
3278 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3279 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3280 }
3281 }
3282}
3283
3284/**
3285 * Clear references to shadowed pages in an EPT page table.
3286 *
3287 * @param pPool The pool.
3288 * @param pPage The page.
3289 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3290 */
3291DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3292{
3293 RTGCPHYS GCPhys = pPage->GCPhys;
3294 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3295 if (pShwPT->a[i].n.u1Present)
3296 {
3297 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3298 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3299 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3300 }
3301}
3302
3303/**
3304 * Clear references to shadowed pages in an EPT page directory.
3305 *
3306 * @param pPool The pool.
3307 * @param pPage The page.
3308 * @param pShwPD The shadow page directory (mapping of the page).
3309 */
3310DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3311{
3312 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3313 {
3314 if (pShwPD->a[i].n.u1Present)
3315 {
3316 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3317 if (pSubPage)
3318 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3319 else
3320 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3321 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3322 }
3323 }
3324}
3325
3326/**
3327 * Clear references to shadowed pages in an EPT page directory pointer table.
3328 *
3329 * @param pPool The pool.
3330 * @param pPage The page.
3331 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3332 */
3333DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3334{
3335 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3336 {
3337 if (pShwPDPT->a[i].n.u1Present)
3338 {
3339 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3340 if (pSubPage)
3341 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3342 else
3343 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3344 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3345 }
3346 }
3347}
3348
3349/**
3350 * Clears all references made by this page.
3351 *
3352 * This includes other shadow pages and GC physical addresses.
3353 *
3354 * @param pPool The pool.
3355 * @param pPage The page.
3356 */
3357static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3358{
3359 /*
3360 * Map the shadow page and take action according to the page kind.
3361 */
3362 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
3363 switch (pPage->enmKind)
3364 {
3365#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3366 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3367 {
3368 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3369 void *pvGst;
3370 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3371 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3372 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3373 break;
3374 }
3375
3376 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3377 {
3378 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3379 void *pvGst;
3380 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3381 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3382 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3383 break;
3384 }
3385
3386 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3387 {
3388 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3389 void *pvGst;
3390 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3391 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3392 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3393 break;
3394 }
3395
3396 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3397 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3398 {
3399 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3400 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3401 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3402 break;
3403 }
3404
3405 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3406 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3407 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3408 {
3409 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3410 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3411 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3412 break;
3413 }
3414
3415#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3416 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3417 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3418 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3419 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3420 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3421 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3422 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3423 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3424 break;
3425#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3426
3427 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
3428 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3429 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3430 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3431 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3432 break;
3433
3434 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3435 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3436 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3437 break;
3438
3439 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
3440 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3441 break;
3442
3443 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3444 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
3445 break;
3446
3447 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3448 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
3449 break;
3450
3451 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3452 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
3453 break;
3454
3455 default:
3456 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3457 }
3458
3459 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3460 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3461 ASMMemZeroPage(pvShw);
3462 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3463 pPage->fZeroed = true;
3464}
3465#endif /* PGMPOOL_WITH_USER_TRACKING */
3466
3467
3468/**
3469 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3470 *
3471 * @param pPool The pool.
3472 */
3473static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3474{
3475 /*
3476 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3477 */
3478 Assert(NIL_PGMPOOL_IDX == 0);
3479 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3480 {
3481 /*
3482 * Get the page address.
3483 */
3484 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3485 union
3486 {
3487 uint64_t *pau64;
3488 uint32_t *pau32;
3489 } u;
3490 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
3491
3492 /*
3493 * Mark stuff not present.
3494 */
3495 switch (pPage->enmKind)
3496 {
3497 case PGMPOOLKIND_ROOT_32BIT_PD:
3498 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3499 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3500 u.pau32[iPage] = 0;
3501 break;
3502
3503 case PGMPOOLKIND_ROOT_PAE_PD:
3504 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * X86_PG_PAE_PDPE_ENTRIES; iPage++)
3505 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3506 u.pau64[iPage] = 0;
3507 break;
3508
3509 case PGMPOOLKIND_ROOT_PDPT:
3510 /* Not root of shadowed pages currently, ignore it. */
3511 break;
3512
3513 case PGMPOOLKIND_ROOT_NESTED:
3514 ASMMemZero32(u.pau64, PAGE_SIZE);
3515 break;
3516 }
3517 }
3518
3519 /*
3520 * Paranoia (to be removed), flag a global CR3 sync.
3521 */
3522 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
3523}
3524
3525
3526/**
3527 * Flushes the entire cache.
3528 *
3529 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3530 * and execute this CR3 flush.
3531 *
3532 * @param pPool The pool.
3533 */
3534static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3535{
3536 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3537 LogFlow(("pgmPoolFlushAllInt:\n"));
3538
3539 /*
3540 * If there are no pages in the pool, there is nothing to do.
3541 */
3542 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3543 {
3544 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3545 return;
3546 }
3547
3548 /*
3549 * Nuke the free list and reinsert all pages into it.
3550 */
3551 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3552 {
3553 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3554
3555#ifdef IN_RING3
3556 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
3557#endif
3558#ifdef PGMPOOL_WITH_MONITORING
3559 if (pPage->fMonitored)
3560 pgmPoolMonitorFlush(pPool, pPage);
3561 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3562 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3563 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3564 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3565 pPage->cModifications = 0;
3566#endif
3567 pPage->GCPhys = NIL_RTGCPHYS;
3568 pPage->enmKind = PGMPOOLKIND_FREE;
3569 Assert(pPage->idx == i);
3570 pPage->iNext = i + 1;
3571 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3572 pPage->fSeenNonGlobal = false;
3573 pPage->fMonitored= false;
3574 pPage->fCached = false;
3575 pPage->fReusedFlushPending = false;
3576 pPage->fCR3Mix = false;
3577#ifdef PGMPOOL_WITH_USER_TRACKING
3578 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3579#endif
3580#ifdef PGMPOOL_WITH_CACHE
3581 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3582 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3583#endif
3584 }
3585 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3586 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3587 pPool->cUsedPages = 0;
3588
3589#ifdef PGMPOOL_WITH_USER_TRACKING
3590 /*
3591 * Zap and reinitialize the user records.
3592 */
3593 pPool->cPresent = 0;
3594 pPool->iUserFreeHead = 0;
3595 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3596 const unsigned cMaxUsers = pPool->cMaxUsers;
3597 for (unsigned i = 0; i < cMaxUsers; i++)
3598 {
3599 paUsers[i].iNext = i + 1;
3600 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3601 paUsers[i].iUserTable = 0xfffffffe;
3602 }
3603 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3604#endif
3605
3606#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3607 /*
3608 * Clear all the GCPhys links and rebuild the phys ext free list.
3609 */
3610 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3611 pRam;
3612 pRam = CTXALLSUFF(pRam->pNext))
3613 {
3614 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3615 while (iPage-- > 0)
3616 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3617 }
3618
3619 pPool->iPhysExtFreeHead = 0;
3620 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3621 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3622 for (unsigned i = 0; i < cMaxPhysExts; i++)
3623 {
3624 paPhysExts[i].iNext = i + 1;
3625 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3626 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3627 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3628 }
3629 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3630#endif
3631
3632#ifdef PGMPOOL_WITH_MONITORING
3633 /*
3634 * Just zap the modified list.
3635 */
3636 pPool->cModifiedPages = 0;
3637 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3638#endif
3639
3640#ifdef PGMPOOL_WITH_CACHE
3641 /*
3642 * Clear the GCPhys hash and the age list.
3643 */
3644 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
3645 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3646 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3647 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3648#endif
3649
3650 /*
3651 * Flush all the special root pages.
3652 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3653 */
3654 pgmPoolFlushAllSpecialRoots(pPool);
3655 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3656 {
3657 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3658 pPage->iNext = NIL_PGMPOOL_IDX;
3659#ifdef PGMPOOL_WITH_MONITORING
3660 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3661 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3662 pPage->cModifications = 0;
3663 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3664 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3665 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3666 if (pPage->fMonitored)
3667 {
3668 PVM pVM = pPool->CTXSUFF(pVM);
3669 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3670 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3671 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3672 pPool->pfnAccessHandlerGC, MMHyperCCToRC(pVM, pPage),
3673 pPool->pszAccessHandler);
3674 AssertFatalRCSuccess(rc);
3675# ifdef PGMPOOL_WITH_CACHE
3676 pgmPoolHashInsert(pPool, pPage);
3677# endif
3678 }
3679#endif
3680#ifdef PGMPOOL_WITH_USER_TRACKING
3681 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3682#endif
3683#ifdef PGMPOOL_WITH_CACHE
3684 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3685 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3686#endif
3687 }
3688
3689 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3690}
3691
3692
3693/**
3694 * Flushes a pool page.
3695 *
3696 * This moves the page to the free list after removing all user references to it.
3697 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3698 *
3699 * @returns VBox status code.
3700 * @retval VINF_SUCCESS on success.
3701 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3702 * @param pPool The pool.
3703 * @param HCPhys The HC physical address of the shadow page.
3704 */
3705int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3706{
3707 int rc = VINF_SUCCESS;
3708 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3709 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3710 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3711
3712 /*
3713 * Quietly reject any attempts at flushing any of the special root pages.
3714 */
3715 if (pPage->idx < PGMPOOL_IDX_FIRST)
3716 {
3717 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3718 return VINF_SUCCESS;
3719 }
3720
3721 /*
3722 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
3723 */
3724 if (PGMGetHyperCR3(CTXSUFF(pPool->pVM)) == pPage->Core.Key)
3725 {
3726 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4, ("Can't free the shadow CR3! (%VGp vs %VGp kind=%d\n", PGMGetHyperCR3(CTXSUFF(pPool->pVM)), pPage->Core.Key, pPage->enmKind));
3727 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3728 return VINF_SUCCESS;
3729 }
3730
3731 /*
3732 * Mark the page as being in need of a ASMMemZeroPage().
3733 */
3734 pPage->fZeroed = false;
3735
3736#ifdef PGMPOOL_WITH_USER_TRACKING
3737 /*
3738 * Clear the page.
3739 */
3740 pgmPoolTrackClearPageUsers(pPool, pPage);
3741 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3742 pgmPoolTrackDeref(pPool, pPage);
3743 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3744#endif
3745
3746#ifdef PGMPOOL_WITH_CACHE
3747 /*
3748 * Flush it from the cache.
3749 */
3750 pgmPoolCacheFlushPage(pPool, pPage);
3751#endif /* PGMPOOL_WITH_CACHE */
3752
3753#ifdef PGMPOOL_WITH_MONITORING
3754 /*
3755 * Deregistering the monitoring.
3756 */
3757 if (pPage->fMonitored)
3758 rc = pgmPoolMonitorFlush(pPool, pPage);
3759#endif
3760
3761 /*
3762 * Free the page.
3763 */
3764 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3765 pPage->iNext = pPool->iFreeHead;
3766 pPool->iFreeHead = pPage->idx;
3767 pPage->enmKind = PGMPOOLKIND_FREE;
3768 pPage->GCPhys = NIL_RTGCPHYS;
3769 pPage->fReusedFlushPending = false;
3770
3771 pPool->cUsedPages--;
3772 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3773 return rc;
3774}
3775
3776
3777/**
3778 * Frees a usage of a pool page.
3779 *
3780 * The caller is responsible to updating the user table so that it no longer
3781 * references the shadow page.
3782 *
3783 * @param pPool The pool.
3784 * @param HCPhys The HC physical address of the shadow page.
3785 * @param iUser The shadow page pool index of the user table.
3786 * @param iUserTable The index into the user table (shadowed).
3787 */
3788void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3789{
3790 STAM_PROFILE_START(&pPool->StatFree, a);
3791 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3792 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3793 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3794#ifdef PGMPOOL_WITH_USER_TRACKING
3795 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3796#endif
3797#ifdef PGMPOOL_WITH_CACHE
3798 if (!pPage->fCached)
3799#endif
3800 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3801 STAM_PROFILE_STOP(&pPool->StatFree, a);
3802}
3803
3804
3805/**
3806 * Makes one or more free page free.
3807 *
3808 * @returns VBox status code.
3809 * @retval VINF_SUCCESS on success.
3810 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3811 *
3812 * @param pPool The pool.
3813 * @param iUser The user of the page.
3814 */
3815static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3816{
3817 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3818
3819 /*
3820 * If the pool isn't full grown yet, expand it.
3821 */
3822 if (pPool->cCurPages < pPool->cMaxPages)
3823 {
3824 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3825#ifdef IN_RING3
3826 int rc = PGMR3PoolGrow(pPool->pVMHC);
3827#else
3828 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3829#endif
3830 if (VBOX_FAILURE(rc))
3831 return rc;
3832 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3833 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3834 return VINF_SUCCESS;
3835 }
3836
3837#ifdef PGMPOOL_WITH_CACHE
3838 /*
3839 * Free one cached page.
3840 */
3841 return pgmPoolCacheFreeOne(pPool, iUser);
3842#else
3843 /*
3844 * Flush the pool.
3845 * If we have tracking enabled, it should be possible to come up with
3846 * a cheap replacement strategy...
3847 */
3848 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
3849 Assert(!CPUMIsGuestInLongMode(pVM));
3850 pgmPoolFlushAllInt(pPool);
3851 return VERR_PGM_POOL_FLUSHED;
3852#endif
3853}
3854
3855
3856/**
3857 * Allocates a page from the pool.
3858 *
3859 * This page may actually be a cached page and not in need of any processing
3860 * on the callers part.
3861 *
3862 * @returns VBox status code.
3863 * @retval VINF_SUCCESS if a NEW page was allocated.
3864 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3865 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3866 * @param pVM The VM handle.
3867 * @param GCPhys The GC physical address of the page we're gonna shadow.
3868 * For 4MB and 2MB PD entries, it's the first address the
3869 * shadow PT is covering.
3870 * @param enmKind The kind of mapping.
3871 * @param iUser The shadow page pool index of the user table.
3872 * @param iUserTable The index into the user table (shadowed).
3873 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3874 */
3875int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
3876{
3877 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3878 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3879 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3880 *ppPage = NULL;
3881
3882#ifdef PGMPOOL_WITH_CACHE
3883 if (pPool->fCacheEnabled)
3884 {
3885 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3886 if (VBOX_SUCCESS(rc2))
3887 {
3888 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3889 LogFlow(("pgmPoolAlloc: cached returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3890 return rc2;
3891 }
3892 }
3893#endif
3894
3895 /*
3896 * Allocate a new one.
3897 */
3898 int rc = VINF_SUCCESS;
3899 uint16_t iNew = pPool->iFreeHead;
3900 if (iNew == NIL_PGMPOOL_IDX)
3901 {
3902 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3903 if (VBOX_FAILURE(rc))
3904 {
3905 if (rc != VERR_PGM_POOL_CLEARED)
3906 {
3907 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3908 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3909 return rc;
3910 }
3911 Log(("pgmPoolMakeMoreFreePages failed with %Vrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
3912 rc = VERR_PGM_POOL_FLUSHED;
3913 }
3914 iNew = pPool->iFreeHead;
3915 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3916 }
3917
3918 /* unlink the free head */
3919 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3920 pPool->iFreeHead = pPage->iNext;
3921 pPage->iNext = NIL_PGMPOOL_IDX;
3922
3923 /*
3924 * Initialize it.
3925 */
3926 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3927 pPage->enmKind = enmKind;
3928 pPage->GCPhys = GCPhys;
3929 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3930 pPage->fMonitored = false;
3931 pPage->fCached = false;
3932 pPage->fReusedFlushPending = false;
3933 pPage->fCR3Mix = false;
3934#ifdef PGMPOOL_WITH_MONITORING
3935 pPage->cModifications = 0;
3936 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3937 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3938#endif
3939#ifdef PGMPOOL_WITH_USER_TRACKING
3940 pPage->cPresent = 0;
3941 pPage->iFirstPresent = ~0;
3942
3943 /*
3944 * Insert into the tracking and cache. If this fails, free the page.
3945 */
3946 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3947 if (VBOX_FAILURE(rc3))
3948 {
3949 if (rc3 != VERR_PGM_POOL_CLEARED)
3950 {
3951 pPool->cUsedPages--;
3952 pPage->enmKind = PGMPOOLKIND_FREE;
3953 pPage->GCPhys = NIL_RTGCPHYS;
3954 pPage->iNext = pPool->iFreeHead;
3955 pPool->iFreeHead = pPage->idx;
3956 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3957 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3958 return rc3;
3959 }
3960 Log(("pgmPoolTrackInsert failed with %Vrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
3961 rc = VERR_PGM_POOL_FLUSHED;
3962 }
3963#endif /* PGMPOOL_WITH_USER_TRACKING */
3964
3965 /*
3966 * Commit the allocation, clear the page and return.
3967 */
3968#ifdef VBOX_WITH_STATISTICS
3969 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3970 pPool->cUsedPagesHigh = pPool->cUsedPages;
3971#endif
3972
3973 if (!pPage->fZeroed)
3974 {
3975 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3976 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3977 ASMMemZeroPage(pv);
3978 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3979 }
3980
3981 *ppPage = pPage;
3982 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3983 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3984 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3985 return rc;
3986}
3987
3988
3989/**
3990 * Frees a usage of a pool page.
3991 *
3992 * @param pVM The VM handle.
3993 * @param HCPhys The HC physical address of the shadow page.
3994 * @param iUser The shadow page pool index of the user table.
3995 * @param iUserTable The index into the user table (shadowed).
3996 */
3997void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
3998{
3999 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4000 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
4001 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4002}
4003
4004
4005/**
4006 * Gets a in-use page in the pool by it's physical address.
4007 *
4008 * @returns Pointer to the page.
4009 * @param pVM The VM handle.
4010 * @param HCPhys The HC physical address of the shadow page.
4011 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4012 */
4013PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4014{
4015 /** @todo profile this! */
4016 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
4017 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4018 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
4019 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
4020 return pPage;
4021}
4022
4023
4024/**
4025 * Flushes the entire cache.
4026 *
4027 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4028 * and execute this CR3 flush.
4029 *
4030 * @param pPool The pool.
4031 */
4032void pgmPoolFlushAll(PVM pVM)
4033{
4034 LogFlow(("pgmPoolFlushAll:\n"));
4035 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
4036}
4037
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette