VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 13193

Last change on this file since 13193 was 13146, checked in by vboxsync, 16 years ago

#1865: Renamed PGMPhysReadGCPhys -> PGMPhysSimpleReadGCPhys, PGMPhysWriteGCPhys -> PGMPhysSimpleWriteGCPhys, PGMPhysReadGCPtrSafe -> PGMPhysReadGCPtr and PGMPhysWriteGCPtrSafe -> PGMPhysWriteGCPtr. This puts PGMPhysRead/Write and PGMPhysRead/WriteGCPtr in the same group.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 146.0 KB
Line 
1/* $Id: PGMAllPool.cpp 13146 2008-10-09 22:58:12Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_GC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42
43
44/*******************************************************************************
45* Internal Functions *
46*******************************************************************************/
47__BEGIN_DECLS
48static void pgmPoolFlushAllInt(PPGMPOOL pPool);
49#ifdef PGMPOOL_WITH_USER_TRACKING
50DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
51DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#endif
54#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
55static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
56#endif
57#ifdef PGMPOOL_WITH_CACHE
58static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
59#endif
60#ifdef PGMPOOL_WITH_MONITORING
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#endif
63#ifndef IN_RING3
64DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
65#endif
66__END_DECLS
67
68
69/**
70 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
71 *
72 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
73 * @param enmKind The page kind.
74 */
75DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
76{
77 switch (enmKind)
78 {
79 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
82 return true;
83 default:
84 return false;
85 }
86}
87
88
89#if defined(IN_GC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
90/**
91 * Maps a pool page into the current context.
92 *
93 * @returns Pointer to the mapping.
94 * @param pVM The VM handle.
95 * @param pPage The page to map.
96 */
97void *pgmPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
98{
99 /* general pages. */
100 if (pPage->idx >= PGMPOOL_IDX_FIRST)
101 {
102 Assert(pPage->idx < pVM->pgm.s.CTX_SUFF(pPool)->cCurPages);
103 void *pv;
104 int rc = PGMDynMapHCPage(pVM, pPage->Core.Key, &pv);
105 AssertReleaseRC(rc);
106 return pv;
107 }
108
109 /* special pages. */
110# ifdef IN_GC
111 switch (pPage->idx)
112 {
113 case PGMPOOL_IDX_PD:
114 return pVM->pgm.s.pGC32BitPD;
115 case PGMPOOL_IDX_PAE_PD:
116 case PGMPOOL_IDX_PAE_PD_0:
117 return pVM->pgm.s.apGCPaePDs[0];
118 case PGMPOOL_IDX_PAE_PD_1:
119 return pVM->pgm.s.apGCPaePDs[1];
120 case PGMPOOL_IDX_PAE_PD_2:
121 return pVM->pgm.s.apGCPaePDs[2];
122 case PGMPOOL_IDX_PAE_PD_3:
123 return pVM->pgm.s.apGCPaePDs[3];
124 case PGMPOOL_IDX_PDPT:
125 return pVM->pgm.s.pGCPaePDPT;
126 default:
127 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
128 return NULL;
129 }
130
131# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
132 RTHCPHYS HCPhys;
133 switch (pPage->idx)
134 {
135 case PGMPOOL_IDX_PD:
136 HCPhys = pVM->pgm.s.HCPhys32BitPD;
137 break;
138 case PGMPOOL_IDX_PAE_PD:
139 case PGMPOOL_IDX_PAE_PD_0:
140 HCPhys = pVM->pgm.s.aHCPhysPaePDs[0];
141 break;
142 case PGMPOOL_IDX_PAE_PD_1:
143 HCPhys = pVM->pgm.s.aHCPhysPaePDs[1];
144 break;
145 case PGMPOOL_IDX_PAE_PD_2:
146 HCPhys = pVM->pgm.s.aHCPhysPaePDs[2];
147 break;
148 case PGMPOOL_IDX_PAE_PD_3:
149 HCPhys = pVM->pgm.s.aHCPhysPaePDs[3];
150 break;
151 case PGMPOOL_IDX_PDPT:
152 HCPhys = pVM->pgm.s.HCPhysPaePDPT;
153 break;
154 default:
155 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
156 return NULL;
157 }
158 void *pv;
159 int rc = PGMDynMapHCPage(pVM, pPage->Core.Key, &pv);
160 AssertReleaseRC(rc);
161 return pv;
162# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
163}
164#endif /* IN_GC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
165
166
167#ifdef PGMPOOL_WITH_MONITORING
168/**
169 * Determin the size of a write instruction.
170 * @returns number of bytes written.
171 * @param pDis The disassembler state.
172 */
173static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
174{
175 /*
176 * This is very crude and possibly wrong for some opcodes,
177 * but since it's not really supposed to be called we can
178 * probably live with that.
179 */
180 return DISGetParamSize(pDis, &pDis->param1);
181}
182
183
184/**
185 * Flushes a chain of pages sharing the same access monitor.
186 *
187 * @returns VBox status code suitable for scheduling.
188 * @param pPool The pool.
189 * @param pPage A page in the chain.
190 */
191int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
192{
193 LogFlow(("pgmPoolMonitorChainFlush: Flush page %VGp type=%d\n", pPage->GCPhys, pPage->enmKind));
194
195 /*
196 * Find the list head.
197 */
198 uint16_t idx = pPage->idx;
199 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
200 {
201 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
202 {
203 idx = pPage->iMonitoredPrev;
204 Assert(idx != pPage->idx);
205 pPage = &pPool->aPages[idx];
206 }
207 }
208
209 /*
210 * Iterate the list flushing each shadow page.
211 */
212 int rc = VINF_SUCCESS;
213 for (;;)
214 {
215 idx = pPage->iMonitoredNext;
216 Assert(idx != pPage->idx);
217 if (pPage->idx >= PGMPOOL_IDX_FIRST)
218 {
219 int rc2 = pgmPoolFlushPage(pPool, pPage);
220 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
221 rc = VINF_PGM_SYNC_CR3;
222 }
223 /* next */
224 if (idx == NIL_PGMPOOL_IDX)
225 break;
226 pPage = &pPool->aPages[idx];
227 }
228 return rc;
229}
230
231
232/**
233 * Wrapper for getting the current context pointer to the entry being modified.
234 *
235 * @returns Pointer to the current context mapping of the entry.
236 * @param pPool The pool.
237 * @param pvFault The fault virtual address.
238 * @param GCPhysFault The fault physical address.
239 * @param cbEntry The entry size.
240 */
241#ifdef IN_RING3
242DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
243#else
244DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
245#endif
246{
247#ifdef IN_GC
248 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
249
250#elif defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
251 void *pvRet;
252 int rc = PGMDynMapGCPageOff(pPool->pVMR0, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
253 AssertFatalRCSuccess(rc);
254 return pvRet;
255
256#elif defined(IN_RING0)
257 void *pvRet;
258 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMR0->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
259 AssertFatalRCSuccess(rc);
260 return pvRet;
261
262#elif defined(IN_RING3)
263 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
264#else
265# error "huh?"
266#endif
267}
268
269
270/**
271 * Process shadow entries before they are changed by the guest.
272 *
273 * For PT entries we will clear them. For PD entries, we'll simply check
274 * for mapping conflicts and set the SyncCR3 FF if found.
275 *
276 * @param pPool The pool.
277 * @param pPage The head page.
278 * @param GCPhysFault The guest physical fault address.
279 * @param uAddress In R0 and GC this is the guest context fault address (flat).
280 * In R3 this is the host context 'fault' address.
281 * @param pCpu The disassembler state for figuring out the write size.
282 * This need not be specified if the caller knows we won't do cross entry accesses.
283 */
284#ifdef IN_RING3
285void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
286#else
287void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
288#endif
289{
290 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
291 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
292 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
293
294 LogFlow(("pgmPoolMonitorChainChanging: %VGv phys=%VGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
295
296 for (;;)
297 {
298 union
299 {
300 void *pv;
301 PX86PT pPT;
302 PX86PTPAE pPTPae;
303 PX86PD pPD;
304 PX86PDPAE pPDPae;
305 PX86PDPT pPDPT;
306 PX86PML4 pPML4;
307 } uShw;
308 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
309
310 switch (pPage->enmKind)
311 {
312 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
313 {
314 const unsigned iShw = off / sizeof(X86PTE);
315 if (uShw.pPT->a[iShw].n.u1Present)
316 {
317# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
318 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
319 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
320 pgmPoolTracDerefGCPhysHint(pPool, pPage,
321 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
322 pGstPte->u & X86_PTE_PG_MASK);
323# endif
324 uShw.pPT->a[iShw].u = 0;
325 }
326 break;
327 }
328
329 /* page/2 sized */
330 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
331 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
332 {
333 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
334 if (uShw.pPTPae->a[iShw].n.u1Present)
335 {
336# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
337 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
338 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
339 pgmPoolTracDerefGCPhysHint(pPool, pPage,
340 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
341 pGstPte->u & X86_PTE_PG_MASK);
342# endif
343 uShw.pPTPae->a[iShw].u = 0;
344 }
345 }
346 break;
347
348 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
349 {
350 const unsigned iShw = off / sizeof(X86PTEPAE);
351 if (uShw.pPTPae->a[iShw].n.u1Present)
352 {
353# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
354 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
355 Log4(("pgmPoolMonitorChainChanging pae: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
356 pgmPoolTracDerefGCPhysHint(pPool, pPage,
357 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
358 pGstPte->u & X86_PTE_PAE_PG_MASK);
359# endif
360 uShw.pPTPae->a[iShw].u = 0;
361 }
362
363 /* paranoia / a bit assumptive. */
364 if ( pCpu
365 && (off & 7)
366 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
367 {
368 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
369 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
370
371 if (uShw.pPTPae->a[iShw2].n.u1Present)
372 {
373# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
374 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
375 Log4(("pgmPoolMonitorChainChanging pae: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
376 pgmPoolTracDerefGCPhysHint(pPool, pPage,
377 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
378 pGstPte->u & X86_PTE_PAE_PG_MASK);
379# endif
380 uShw.pPTPae->a[iShw2].u = 0;
381 }
382 }
383
384 break;
385 }
386
387 case PGMPOOLKIND_ROOT_32BIT_PD:
388 {
389 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
390 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
391 {
392 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
393 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
394 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
395 }
396 /* paranoia / a bit assumptive. */
397 else if ( pCpu
398 && (off & 3)
399 && (off & 3) + cbWrite > sizeof(X86PTE))
400 {
401 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
402 if ( iShw2 != iShw
403 && iShw2 < RT_ELEMENTS(uShw.pPD->a)
404 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
405 {
406 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
407 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
408 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
409 }
410 }
411#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
412 if ( uShw.pPD->a[iShw].n.u1Present
413 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
414 {
415 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
416# ifdef IN_GC /* TLB load - we're pushing things a bit... */
417 ASMProbeReadByte(pvAddress);
418# endif
419 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
420 uShw.pPD->a[iShw].u = 0;
421 }
422#endif
423 break;
424 }
425
426 case PGMPOOLKIND_ROOT_PAE_PD:
427 {
428 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
429 for (unsigned i = 0; i < 2; i++, iShw++)
430 {
431 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
432 {
433 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
434 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
435 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
436 }
437 /* paranoia / a bit assumptive. */
438 else if ( pCpu
439 && (off & 3)
440 && (off & 3) + cbWrite > 4)
441 {
442 const unsigned iShw2 = iShw + 2;
443 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a)
444 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
445 {
446 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
447 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
448 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
449 }
450 }
451#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
452 if ( uShw.pPDPae->a[iShw].n.u1Present
453 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
454 {
455 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
456# ifdef IN_GC /* TLB load - we're pushing things a bit... */
457 ASMProbeReadByte(pvAddress);
458# endif
459 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
460 uShw.pPDPae->a[iShw].u = 0;
461 }
462#endif
463 }
464 break;
465 }
466
467 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
468 {
469 const unsigned iShw = off / sizeof(X86PDEPAE);
470 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
471 {
472 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
473 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
474 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
475 }
476#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
477 /* causes trouble when the guest uses a PDE to refer to the whole page table level structure. (invalidate here; faults later on when it tries
478 * to change the page table entries
479 * -> recheck; probably only applies to the GC case
480 */
481 else
482 {
483 if (uShw.pPDPae->a[iShw].n.u1Present)
484 {
485 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
486 pgmPoolFree(pPool->CTX_SUFF(pVM),
487 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
488 /* Note: hardcoded PAE implementation dependency */
489 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
490 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
491 uShw.pPDPae->a[iShw].u = 0;
492 }
493 }
494#endif
495 /* paranoia / a bit assumptive. */
496 if ( pCpu
497 && (off & 7)
498 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
499 {
500 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
501 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
502
503 if ( iShw2 != iShw
504 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
505 {
506 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
507 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
508 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
509 }
510#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
511 else
512 if (uShw.pPDPae->a[iShw2].n.u1Present)
513 {
514 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
515 pgmPoolFree(pPool->CTX_SUFF(pVM),
516 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
517 /* Note: hardcoded PAE implementation dependency */
518 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
519 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
520 uShw.pPDPae->a[iShw2].u = 0;
521 }
522#endif
523 }
524 break;
525 }
526
527 case PGMPOOLKIND_ROOT_PDPT:
528 {
529 /* Hopefully this doesn't happen very often:
530 * - touching unused parts of the page
531 * - messing with the bits of pd pointers without changing the physical address
532 */
533 const unsigned iShw = off / sizeof(X86PDPE);
534 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
535 {
536 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
537 {
538 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
539 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
540 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
541 }
542 /* paranoia / a bit assumptive. */
543 else if ( pCpu
544 && (off & 7)
545 && (off & 7) + cbWrite > sizeof(X86PDPE))
546 {
547 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
548 if ( iShw2 != iShw
549 && iShw2 < X86_PG_PAE_PDPE_ENTRIES
550 && uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
551 {
552 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
553 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
554 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
555 }
556 }
557 }
558 break;
559 }
560
561#ifndef IN_GC
562 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
563 {
564 Assert(pPage->enmKind == PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD);
565
566 const unsigned iShw = off / sizeof(X86PDEPAE);
567 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
568 {
569 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
570 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
571 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
572 }
573 else
574 {
575 if (uShw.pPDPae->a[iShw].n.u1Present)
576 {
577 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
578 pgmPoolFree(pPool->CTX_SUFF(pVM),
579 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
580 pPage->idx,
581 iShw);
582 uShw.pPDPae->a[iShw].u = 0;
583 }
584 }
585 /* paranoia / a bit assumptive. */
586 if ( pCpu
587 && (off & 7)
588 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
589 {
590 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
591 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
592
593 if ( iShw2 != iShw
594 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
595 {
596 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
597 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
598 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
599 }
600 else
601 if (uShw.pPDPae->a[iShw2].n.u1Present)
602 {
603 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
604 pgmPoolFree(pPool->CTX_SUFF(pVM),
605 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
606 pPage->idx,
607 iShw2);
608 uShw.pPDPae->a[iShw2].u = 0;
609 }
610 }
611 break;
612 }
613
614 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
615 {
616 /* Hopefully this doesn't happen very often:
617 * - messing with the bits of pd pointers without changing the physical address
618 */
619 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
620 {
621 const unsigned iShw = off / sizeof(X86PDPE);
622 if (uShw.pPDPT->a[iShw].n.u1Present)
623 {
624 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
625 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
626 uShw.pPDPT->a[iShw].u = 0;
627 }
628 /* paranoia / a bit assumptive. */
629 if ( pCpu
630 && (off & 7)
631 && (off & 7) + cbWrite > sizeof(X86PDPE))
632 {
633 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
634 if (uShw.pPDPT->a[iShw2].n.u1Present)
635 {
636 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
637 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
638 uShw.pPDPT->a[iShw2].u = 0;
639 }
640 }
641 }
642 break;
643 }
644
645 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
646 {
647 /* Hopefully this doesn't happen very often:
648 * - messing with the bits of pd pointers without changing the physical address
649 */
650 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
651 {
652 const unsigned iShw = off / sizeof(X86PDPE);
653 if (uShw.pPML4->a[iShw].n.u1Present)
654 {
655 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
656 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
657 uShw.pPML4->a[iShw].u = 0;
658 }
659 /* paranoia / a bit assumptive. */
660 if ( pCpu
661 && (off & 7)
662 && (off & 7) + cbWrite > sizeof(X86PDPE))
663 {
664 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
665 if (uShw.pPML4->a[iShw2].n.u1Present)
666 {
667 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
668 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
669 uShw.pPML4->a[iShw2].u = 0;
670 }
671 }
672 }
673 break;
674 }
675#endif /* IN_RING0 */
676
677 default:
678 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
679 }
680
681 /* next */
682 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
683 return;
684 pPage = &pPool->aPages[pPage->iMonitoredNext];
685 }
686}
687
688
689# ifndef IN_RING3
690/**
691 * Checks if a access could be a fork operation in progress.
692 *
693 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
694 *
695 * @returns true if it's likly that we're forking, otherwise false.
696 * @param pPool The pool.
697 * @param pCpu The disassembled instruction.
698 * @param offFault The access offset.
699 */
700DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
701{
702 /*
703 * i386 linux is using btr to clear X86_PTE_RW.
704 * The functions involved are (2.6.16 source inspection):
705 * clear_bit
706 * ptep_set_wrprotect
707 * copy_one_pte
708 * copy_pte_range
709 * copy_pmd_range
710 * copy_pud_range
711 * copy_page_range
712 * dup_mmap
713 * dup_mm
714 * copy_mm
715 * copy_process
716 * do_fork
717 */
718 if ( pCpu->pCurInstr->opcode == OP_BTR
719 && !(offFault & 4)
720 /** @todo Validate that the bit index is X86_PTE_RW. */
721 )
722 {
723 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
724 return true;
725 }
726 return false;
727}
728
729
730/**
731 * Determine whether the page is likely to have been reused.
732 *
733 * @returns true if we consider the page as being reused for a different purpose.
734 * @returns false if we consider it to still be a paging page.
735 * @param pVM VM Handle.
736 * @param pPage The page in question.
737 * @param pRegFrame Trap register frame.
738 * @param pCpu The disassembly info for the faulting instruction.
739 * @param pvFault The fault address.
740 *
741 * @remark The REP prefix check is left to the caller because of STOSD/W.
742 */
743DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
744{
745#ifndef IN_GC
746 if ( HWACCMHasPendingIrq(pVM)
747 && (pRegFrame->rsp - pvFault) < 32)
748 {
749 /* Fault caused by stack writes while trying to inject an interrupt event. */
750 Log(("pgmPoolMonitorIsReused: reused %VGv for interrupt stack (rsp=%VGv).\n", pvFault, pRegFrame->rsp));
751 return true;
752 }
753#else
754 NOREF(pVM);
755#endif
756
757 switch (pCpu->pCurInstr->opcode)
758 {
759 /* call implies the actual push of the return address faulted */
760 case OP_CALL:
761 Log4(("pgmPoolMonitorIsReused: CALL\n"));
762 return true;
763 case OP_PUSH:
764 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
765 return true;
766 case OP_PUSHF:
767 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
768 return true;
769 case OP_PUSHA:
770 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
771 return true;
772 case OP_FXSAVE:
773 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
774 return true;
775 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
776 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
777 return true;
778 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
779 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
780 return true;
781 case OP_MOVSWD:
782 case OP_STOSWD:
783 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
784 && pRegFrame->rcx >= 0x40
785 )
786 {
787 Assert(pCpu->mode == CPUMODE_64BIT);
788
789 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
790 return true;
791 }
792 return false;
793 }
794 if ( (pCpu->param1.flags & USE_REG_GEN32)
795 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
796 {
797 Log4(("pgmPoolMonitorIsReused: ESP\n"));
798 return true;
799 }
800
801 //if (pPage->fCR3Mix)
802 // return false;
803 return false;
804}
805
806
807/**
808 * Flushes the page being accessed.
809 *
810 * @returns VBox status code suitable for scheduling.
811 * @param pVM The VM handle.
812 * @param pPool The pool.
813 * @param pPage The pool page (head).
814 * @param pCpu The disassembly of the write instruction.
815 * @param pRegFrame The trap register frame.
816 * @param GCPhysFault The fault address as guest physical address.
817 * @param pvFault The fault address.
818 */
819static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
820 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
821{
822 /*
823 * First, do the flushing.
824 */
825 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
826
827 /*
828 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
829 */
830 uint32_t cbWritten;
831 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
832 if (VBOX_SUCCESS(rc2))
833 pRegFrame->rip += pCpu->opsize;
834 else if (rc2 == VERR_EM_INTERPRETER)
835 {
836#ifdef IN_GC
837 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
838 {
839 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
840 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
841 rc = VINF_SUCCESS;
842 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
843 }
844 else
845#endif
846 {
847 rc = VINF_EM_RAW_EMULATE_INSTR;
848 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
849 }
850 }
851 else
852 rc = rc2;
853
854 /* See use in pgmPoolAccessHandlerSimple(). */
855 PGM_INVL_GUEST_TLBS();
856
857 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
858 return rc;
859
860}
861
862
863/**
864 * Handles the STOSD write accesses.
865 *
866 * @returns VBox status code suitable for scheduling.
867 * @param pVM The VM handle.
868 * @param pPool The pool.
869 * @param pPage The pool page (head).
870 * @param pCpu The disassembly of the write instruction.
871 * @param pRegFrame The trap register frame.
872 * @param GCPhysFault The fault address as guest physical address.
873 * @param pvFault The fault address.
874 */
875DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
876 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
877{
878 Assert(pCpu->mode == CPUMODE_32BIT);
879
880 /*
881 * Increment the modification counter and insert it into the list
882 * of modified pages the first time.
883 */
884 if (!pPage->cModifications++)
885 pgmPoolMonitorModifiedInsert(pPool, pPage);
886
887 /*
888 * Execute REP STOSD.
889 *
890 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
891 * write situation, meaning that it's safe to write here.
892 */
893 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
894 while (pRegFrame->ecx)
895 {
896 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
897#ifdef IN_GC
898 *(uint32_t *)pu32 = pRegFrame->eax;
899#else
900 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
901#endif
902 pu32 += 4;
903 GCPhysFault += 4;
904 pRegFrame->edi += 4;
905 pRegFrame->ecx--;
906 }
907 pRegFrame->rip += pCpu->opsize;
908
909 /* See use in pgmPoolAccessHandlerSimple(). */
910 PGM_INVL_GUEST_TLBS();
911
912 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
913 return VINF_SUCCESS;
914}
915
916
917/**
918 * Handles the simple write accesses.
919 *
920 * @returns VBox status code suitable for scheduling.
921 * @param pVM The VM handle.
922 * @param pPool The pool.
923 * @param pPage The pool page (head).
924 * @param pCpu The disassembly of the write instruction.
925 * @param pRegFrame The trap register frame.
926 * @param GCPhysFault The fault address as guest physical address.
927 * @param pvFault The fault address.
928 */
929DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
930 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
931{
932 /*
933 * Increment the modification counter and insert it into the list
934 * of modified pages the first time.
935 */
936 if (!pPage->cModifications++)
937 pgmPoolMonitorModifiedInsert(pPool, pPage);
938
939 /*
940 * Clear all the pages. ASSUMES that pvFault is readable.
941 */
942 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
943
944 /*
945 * Interpret the instruction.
946 */
947 uint32_t cb;
948 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
949 if (VBOX_SUCCESS(rc))
950 pRegFrame->rip += pCpu->opsize;
951 else if (rc == VERR_EM_INTERPRETER)
952 {
953 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
954 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
955 rc = VINF_EM_RAW_EMULATE_INSTR;
956 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
957 }
958
959 /*
960 * Quick hack, with logging enabled we're getting stale
961 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
962 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
963 * have to be fixed to support this. But that'll have to wait till next week.
964 *
965 * An alternative is to keep track of the changed PTEs together with the
966 * GCPhys from the guest PT. This may proove expensive though.
967 *
968 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
969 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
970 */
971 PGM_INVL_GUEST_TLBS();
972
973 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
974 return rc;
975}
976
977
978/**
979 * \#PF Handler callback for PT write accesses.
980 *
981 * @returns VBox status code (appropriate for GC return).
982 * @param pVM VM Handle.
983 * @param uErrorCode CPU Error code.
984 * @param pRegFrame Trap register frame.
985 * NULL on DMA and other non CPU access.
986 * @param pvFault The fault address (cr2).
987 * @param GCPhysFault The GC physical address corresponding to pvFault.
988 * @param pvUser User argument.
989 */
990DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
991{
992 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
993 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
994 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
995 LogFlow(("pgmPoolAccessHandler: pvFault=%VGv pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
996
997 /*
998 * We should ALWAYS have the list head as user parameter. This
999 * is because we use that page to record the changes.
1000 */
1001 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1002
1003 /*
1004 * Disassemble the faulting instruction.
1005 */
1006 DISCPUSTATE Cpu;
1007 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1008 AssertRCReturn(rc, rc);
1009
1010 /*
1011 * Check if it's worth dealing with.
1012 */
1013 bool fReused = false;
1014 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1015 || pPage->fCR3Mix)
1016 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1017 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1018 {
1019 /*
1020 * Simple instructions, no REP prefix.
1021 */
1022 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1023 {
1024 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1025 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1026 return rc;
1027 }
1028
1029 /*
1030 * Windows is frequently doing small memset() operations (netio test 4k+).
1031 * We have to deal with these or we'll kill the cache and performance.
1032 */
1033 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1034 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1035 && pRegFrame->ecx <= 0x20
1036 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1037 && !((uintptr_t)pvFault & 3)
1038 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1039 && Cpu.mode == CPUMODE_32BIT
1040 && Cpu.opmode == CPUMODE_32BIT
1041 && Cpu.addrmode == CPUMODE_32BIT
1042 && Cpu.prefix == PREFIX_REP
1043 && !pRegFrame->eflags.Bits.u1DF
1044 )
1045 {
1046 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1047 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1048 return rc;
1049 }
1050
1051 /* REP prefix, don't bother. */
1052 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1053 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%VGv opcode=%d prefix=%#x\n",
1054 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1055 }
1056
1057 /*
1058 * Not worth it, so flush it.
1059 *
1060 * If we considered it to be reused, don't to back to ring-3
1061 * to emulate failed instructions since we usually cannot
1062 * interpret then. This may be a bit risky, in which case
1063 * the reuse detection must be fixed.
1064 */
1065 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1066 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1067 rc = VINF_SUCCESS;
1068 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1069 return rc;
1070}
1071
1072# endif /* !IN_RING3 */
1073#endif /* PGMPOOL_WITH_MONITORING */
1074
1075
1076
1077#ifdef PGMPOOL_WITH_CACHE
1078/**
1079 * Inserts a page into the GCPhys hash table.
1080 *
1081 * @param pPool The pool.
1082 * @param pPage The page.
1083 */
1084DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1085{
1086 Log3(("pgmPoolHashInsert: %VGp\n", pPage->GCPhys));
1087 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1088 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1089 pPage->iNext = pPool->aiHash[iHash];
1090 pPool->aiHash[iHash] = pPage->idx;
1091}
1092
1093
1094/**
1095 * Removes a page from the GCPhys hash table.
1096 *
1097 * @param pPool The pool.
1098 * @param pPage The page.
1099 */
1100DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1101{
1102 Log3(("pgmPoolHashRemove: %VGp\n", pPage->GCPhys));
1103 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1104 if (pPool->aiHash[iHash] == pPage->idx)
1105 pPool->aiHash[iHash] = pPage->iNext;
1106 else
1107 {
1108 uint16_t iPrev = pPool->aiHash[iHash];
1109 for (;;)
1110 {
1111 const int16_t i = pPool->aPages[iPrev].iNext;
1112 if (i == pPage->idx)
1113 {
1114 pPool->aPages[iPrev].iNext = pPage->iNext;
1115 break;
1116 }
1117 if (i == NIL_PGMPOOL_IDX)
1118 {
1119 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1120 break;
1121 }
1122 iPrev = i;
1123 }
1124 }
1125 pPage->iNext = NIL_PGMPOOL_IDX;
1126}
1127
1128
1129/**
1130 * Frees up one cache page.
1131 *
1132 * @returns VBox status code.
1133 * @retval VINF_SUCCESS on success.
1134 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1135 * @param pPool The pool.
1136 * @param iUser The user index.
1137 */
1138static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1139{
1140#ifndef IN_GC
1141 const PVM pVM = pPool->CTX_SUFF(pVM);
1142#endif
1143 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1144 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1145
1146 /*
1147 * Select one page from the tail of the age list.
1148 */
1149 uint16_t iToFree = pPool->iAgeTail;
1150 if (iToFree == iUser)
1151 iToFree = pPool->aPages[iToFree].iAgePrev;
1152/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1153 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1154 {
1155 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1156 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1157 {
1158 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1159 continue;
1160 iToFree = i;
1161 break;
1162 }
1163 }
1164*/
1165 Assert(iToFree != iUser);
1166 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1167
1168 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
1169 if (rc == VINF_SUCCESS)
1170 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1171 return rc;
1172}
1173
1174
1175/**
1176 * Checks if a kind mismatch is really a page being reused
1177 * or if it's just normal remappings.
1178 *
1179 * @returns true if reused and the cached page (enmKind1) should be flushed
1180 * @returns false if not reused.
1181 * @param enmKind1 The kind of the cached page.
1182 * @param enmKind2 The kind of the requested page.
1183 */
1184static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1185{
1186 switch (enmKind1)
1187 {
1188 /*
1189 * Never reuse them. There is no remapping in non-paging mode.
1190 */
1191 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1192 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1193 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1194 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1195 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1196 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1197 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1198 return true;
1199
1200 /*
1201 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1202 */
1203 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1204 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1205 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1206 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1207 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1208 switch (enmKind2)
1209 {
1210 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1211 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1212 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1213 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1214 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1215 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1216 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1217 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1218 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1219 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1220 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1221 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1222 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1223 return true;
1224 default:
1225 return false;
1226 }
1227
1228 /*
1229 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1230 */
1231 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1232 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1233 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1234 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1235 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1236 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1237 switch (enmKind2)
1238 {
1239 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1240 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1241 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1242 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1243 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1244 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1245 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1246 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1247 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1248 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1249 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1250 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1251 return true;
1252 default:
1253 return false;
1254 }
1255
1256 /*
1257 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1258 */
1259 case PGMPOOLKIND_ROOT_32BIT_PD:
1260 case PGMPOOLKIND_ROOT_PAE_PD:
1261 case PGMPOOLKIND_ROOT_PDPT:
1262 case PGMPOOLKIND_ROOT_NESTED:
1263 return false;
1264
1265 default:
1266 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1267 }
1268}
1269
1270
1271/**
1272 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1273 *
1274 * @returns VBox status code.
1275 * @retval VINF_PGM_CACHED_PAGE on success.
1276 * @retval VERR_FILE_NOT_FOUND if not found.
1277 * @param pPool The pool.
1278 * @param GCPhys The GC physical address of the page we're gonna shadow.
1279 * @param enmKind The kind of mapping.
1280 * @param iUser The shadow page pool index of the user table.
1281 * @param iUserTable The index into the user table (shadowed).
1282 * @param ppPage Where to store the pointer to the page.
1283 */
1284static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1285{
1286#ifndef IN_GC
1287 const PVM pVM = pPool->CTX_SUFF(pVM);
1288#endif
1289 /*
1290 * Look up the GCPhys in the hash.
1291 */
1292 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1293 Log3(("pgmPoolCacheAlloc: %VGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1294 if (i != NIL_PGMPOOL_IDX)
1295 {
1296 do
1297 {
1298 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1299 Log3(("pgmPoolCacheAlloc: slot %d found page %VGp\n", i, pPage->GCPhys));
1300 if (pPage->GCPhys == GCPhys)
1301 {
1302 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1303 {
1304 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1305 if (VBOX_SUCCESS(rc))
1306 {
1307 *ppPage = pPage;
1308 STAM_COUNTER_INC(&pPool->StatCacheHits);
1309 return VINF_PGM_CACHED_PAGE;
1310 }
1311 return rc;
1312 }
1313
1314 /*
1315 * The kind is different. In some cases we should now flush the page
1316 * as it has been reused, but in most cases this is normal remapping
1317 * of PDs as PT or big pages using the GCPhys field in a slightly
1318 * different way than the other kinds.
1319 */
1320 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1321 {
1322 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1323 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1324 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1325 break;
1326 }
1327 }
1328
1329 /* next */
1330 i = pPage->iNext;
1331 } while (i != NIL_PGMPOOL_IDX);
1332 }
1333
1334 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1335 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1336 return VERR_FILE_NOT_FOUND;
1337}
1338
1339
1340/**
1341 * Inserts a page into the cache.
1342 *
1343 * @param pPool The pool.
1344 * @param pPage The cached page.
1345 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1346 */
1347static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1348{
1349 /*
1350 * Insert into the GCPhys hash if the page is fit for that.
1351 */
1352 Assert(!pPage->fCached);
1353 if (fCanBeCached)
1354 {
1355 pPage->fCached = true;
1356 pgmPoolHashInsert(pPool, pPage);
1357 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1358 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1359 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1360 }
1361 else
1362 {
1363 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1364 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1365 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1366 }
1367
1368 /*
1369 * Insert at the head of the age list.
1370 */
1371 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1372 pPage->iAgeNext = pPool->iAgeHead;
1373 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1374 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1375 else
1376 pPool->iAgeTail = pPage->idx;
1377 pPool->iAgeHead = pPage->idx;
1378}
1379
1380
1381/**
1382 * Flushes a cached page.
1383 *
1384 * @param pPool The pool.
1385 * @param pPage The cached page.
1386 */
1387static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1388{
1389 Log3(("pgmPoolCacheFlushPage: %VGp\n", pPage->GCPhys));
1390
1391 /*
1392 * Remove the page from the hash.
1393 */
1394 if (pPage->fCached)
1395 {
1396 pPage->fCached = false;
1397 pgmPoolHashRemove(pPool, pPage);
1398 }
1399 else
1400 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1401
1402 /*
1403 * Remove it from the age list.
1404 */
1405 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1406 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1407 else
1408 pPool->iAgeTail = pPage->iAgePrev;
1409 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1410 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1411 else
1412 pPool->iAgeHead = pPage->iAgeNext;
1413 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1414 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1415}
1416#endif /* PGMPOOL_WITH_CACHE */
1417
1418
1419#ifdef PGMPOOL_WITH_MONITORING
1420/**
1421 * Looks for pages sharing the monitor.
1422 *
1423 * @returns Pointer to the head page.
1424 * @returns NULL if not found.
1425 * @param pPool The Pool
1426 * @param pNewPage The page which is going to be monitored.
1427 */
1428static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1429{
1430#ifdef PGMPOOL_WITH_CACHE
1431 /*
1432 * Look up the GCPhys in the hash.
1433 */
1434 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1435 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1436 if (i == NIL_PGMPOOL_IDX)
1437 return NULL;
1438 do
1439 {
1440 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1441 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1442 && pPage != pNewPage)
1443 {
1444 switch (pPage->enmKind)
1445 {
1446 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1447 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1448 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1449 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1450 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1451 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1452 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1453 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1454 case PGMPOOLKIND_ROOT_32BIT_PD:
1455 case PGMPOOLKIND_ROOT_PAE_PD:
1456 case PGMPOOLKIND_ROOT_PDPT:
1457 {
1458 /* find the head */
1459 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1460 {
1461 Assert(pPage->iMonitoredPrev != pPage->idx);
1462 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1463 }
1464 return pPage;
1465 }
1466
1467 /* ignore, no monitoring. */
1468 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1469 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1470 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1471 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1472 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1473 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1474 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1475 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1476 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1477 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1478 case PGMPOOLKIND_ROOT_NESTED:
1479 break;
1480 default:
1481 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1482 }
1483 }
1484
1485 /* next */
1486 i = pPage->iNext;
1487 } while (i != NIL_PGMPOOL_IDX);
1488#endif
1489 return NULL;
1490}
1491
1492/**
1493 * Enabled write monitoring of a guest page.
1494 *
1495 * @returns VBox status code.
1496 * @retval VINF_SUCCESS on success.
1497 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1498 * @param pPool The pool.
1499 * @param pPage The cached page.
1500 */
1501static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1502{
1503 LogFlow(("pgmPoolMonitorInsert %VGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1504
1505 /*
1506 * Filter out the relevant kinds.
1507 */
1508 switch (pPage->enmKind)
1509 {
1510 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1511 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1512 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1513 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1514 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1515 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1516 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1517 case PGMPOOLKIND_ROOT_PDPT:
1518 break;
1519
1520 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1521 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1522 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1523 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1524 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1525 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1526 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1527 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1528 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1529 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1530 case PGMPOOLKIND_ROOT_NESTED:
1531 /* Nothing to monitor here. */
1532 return VINF_SUCCESS;
1533
1534 case PGMPOOLKIND_ROOT_32BIT_PD:
1535 case PGMPOOLKIND_ROOT_PAE_PD:
1536#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1537 break;
1538#endif
1539 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1540 default:
1541 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1542 }
1543
1544 /*
1545 * Install handler.
1546 */
1547 int rc;
1548 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1549 if (pPageHead)
1550 {
1551 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1552 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1553 pPage->iMonitoredPrev = pPageHead->idx;
1554 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1555 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1556 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1557 pPageHead->iMonitoredNext = pPage->idx;
1558 rc = VINF_SUCCESS;
1559 }
1560 else
1561 {
1562 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1563 PVM pVM = pPool->CTX_SUFF(pVM);
1564 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1565 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1566 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1567 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1568 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1569 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1570 pPool->pszAccessHandler);
1571 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1572 * the heap size should suffice. */
1573 AssertFatalRC(rc);
1574 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1575 rc = VERR_PGM_POOL_CLEARED;
1576 }
1577 pPage->fMonitored = true;
1578 return rc;
1579}
1580
1581
1582/**
1583 * Disables write monitoring of a guest page.
1584 *
1585 * @returns VBox status code.
1586 * @retval VINF_SUCCESS on success.
1587 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1588 * @param pPool The pool.
1589 * @param pPage The cached page.
1590 */
1591static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1592{
1593 /*
1594 * Filter out the relevant kinds.
1595 */
1596 switch (pPage->enmKind)
1597 {
1598 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1599 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1600 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1601 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1602 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1603 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1604 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1605 case PGMPOOLKIND_ROOT_PDPT:
1606 break;
1607
1608 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1609 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1610 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1611 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1612 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1613 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1614 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1615 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1616 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1617 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1618 case PGMPOOLKIND_ROOT_NESTED:
1619 /* Nothing to monitor here. */
1620 return VINF_SUCCESS;
1621
1622 case PGMPOOLKIND_ROOT_32BIT_PD:
1623 case PGMPOOLKIND_ROOT_PAE_PD:
1624#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1625 break;
1626#endif
1627 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1628 default:
1629 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1630 }
1631
1632 /*
1633 * Remove the page from the monitored list or uninstall it if last.
1634 */
1635 const PVM pVM = pPool->CTX_SUFF(pVM);
1636 int rc;
1637 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1638 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1639 {
1640 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1641 {
1642 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1643 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1644 pNewHead->fCR3Mix = pPage->fCR3Mix;
1645 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1646 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1647 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1648 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
1649 pPool->pszAccessHandler);
1650 AssertFatalRCSuccess(rc);
1651 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1652 }
1653 else
1654 {
1655 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1656 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1657 {
1658 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1659 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1660 }
1661 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1662 rc = VINF_SUCCESS;
1663 }
1664 }
1665 else
1666 {
1667 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1668 AssertFatalRC(rc);
1669 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1670 rc = VERR_PGM_POOL_CLEARED;
1671 }
1672 pPage->fMonitored = false;
1673
1674 /*
1675 * Remove it from the list of modified pages (if in it).
1676 */
1677 pgmPoolMonitorModifiedRemove(pPool, pPage);
1678
1679 return rc;
1680}
1681
1682
1683#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1684/**
1685 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1686 *
1687 * @param pPool The Pool.
1688 * @param pPage A page in the chain.
1689 * @param fCR3Mix The new fCR3Mix value.
1690 */
1691static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1692{
1693 /* current */
1694 pPage->fCR3Mix = fCR3Mix;
1695
1696 /* before */
1697 int16_t idx = pPage->iMonitoredPrev;
1698 while (idx != NIL_PGMPOOL_IDX)
1699 {
1700 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1701 idx = pPool->aPages[idx].iMonitoredPrev;
1702 }
1703
1704 /* after */
1705 idx = pPage->iMonitoredNext;
1706 while (idx != NIL_PGMPOOL_IDX)
1707 {
1708 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1709 idx = pPool->aPages[idx].iMonitoredNext;
1710 }
1711}
1712
1713
1714/**
1715 * Installs or modifies monitoring of a CR3 page (special).
1716 *
1717 * We're pretending the CR3 page is shadowed by the pool so we can use the
1718 * generic mechanisms in detecting chained monitoring. (This also gives us a
1719 * tast of what code changes are required to really pool CR3 shadow pages.)
1720 *
1721 * @returns VBox status code.
1722 * @param pPool The pool.
1723 * @param idxRoot The CR3 (root) page index.
1724 * @param GCPhysCR3 The (new) CR3 value.
1725 */
1726int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1727{
1728 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1729 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1730 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1731 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1732
1733 /*
1734 * The unlikely case where it already matches.
1735 */
1736 if (pPage->GCPhys == GCPhysCR3)
1737 {
1738 Assert(pPage->fMonitored);
1739 return VINF_SUCCESS;
1740 }
1741
1742 /*
1743 * Flush the current monitoring and remove it from the hash.
1744 */
1745 int rc = VINF_SUCCESS;
1746 if (pPage->fMonitored)
1747 {
1748 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1749 rc = pgmPoolMonitorFlush(pPool, pPage);
1750 if (rc == VERR_PGM_POOL_CLEARED)
1751 rc = VINF_SUCCESS;
1752 else
1753 AssertFatalRC(rc);
1754 pgmPoolHashRemove(pPool, pPage);
1755 }
1756
1757 /*
1758 * Monitor the page at the new location and insert it into the hash.
1759 */
1760 pPage->GCPhys = GCPhysCR3;
1761 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1762 if (rc2 != VERR_PGM_POOL_CLEARED)
1763 {
1764 AssertFatalRC(rc2);
1765 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1766 rc = rc2;
1767 }
1768 pgmPoolHashInsert(pPool, pPage);
1769 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1770 return rc;
1771}
1772
1773
1774/**
1775 * Removes the monitoring of a CR3 page (special).
1776 *
1777 * @returns VBox status code.
1778 * @param pPool The pool.
1779 * @param idxRoot The CR3 (root) page index.
1780 */
1781int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1782{
1783 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1784 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1785 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1786 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1787
1788 if (!pPage->fMonitored)
1789 return VINF_SUCCESS;
1790
1791 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1792 int rc = pgmPoolMonitorFlush(pPool, pPage);
1793 if (rc != VERR_PGM_POOL_CLEARED)
1794 AssertFatalRC(rc);
1795 else
1796 rc = VINF_SUCCESS;
1797 pgmPoolHashRemove(pPool, pPage);
1798 Assert(!pPage->fMonitored);
1799 pPage->GCPhys = NIL_RTGCPHYS;
1800 return rc;
1801}
1802#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1803
1804
1805/**
1806 * Inserts the page into the list of modified pages.
1807 *
1808 * @param pPool The pool.
1809 * @param pPage The page.
1810 */
1811void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1812{
1813 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1814 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1815 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1816 && pPool->iModifiedHead != pPage->idx,
1817 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1818 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1819 pPool->iModifiedHead, pPool->cModifiedPages));
1820
1821 pPage->iModifiedNext = pPool->iModifiedHead;
1822 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1823 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1824 pPool->iModifiedHead = pPage->idx;
1825 pPool->cModifiedPages++;
1826#ifdef VBOX_WITH_STATISTICS
1827 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1828 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1829#endif
1830}
1831
1832
1833/**
1834 * Removes the page from the list of modified pages and resets the
1835 * moficiation counter.
1836 *
1837 * @param pPool The pool.
1838 * @param pPage The page which is believed to be in the list of modified pages.
1839 */
1840static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1841{
1842 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1843 if (pPool->iModifiedHead == pPage->idx)
1844 {
1845 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1846 pPool->iModifiedHead = pPage->iModifiedNext;
1847 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1848 {
1849 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1850 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1851 }
1852 pPool->cModifiedPages--;
1853 }
1854 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1855 {
1856 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1857 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1858 {
1859 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1860 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1861 }
1862 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1863 pPool->cModifiedPages--;
1864 }
1865 else
1866 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1867 pPage->cModifications = 0;
1868}
1869
1870
1871/**
1872 * Zaps the list of modified pages, resetting their modification counters in the process.
1873 *
1874 * @param pVM The VM handle.
1875 */
1876void pgmPoolMonitorModifiedClearAll(PVM pVM)
1877{
1878 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1879 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1880
1881 unsigned cPages = 0; NOREF(cPages);
1882 uint16_t idx = pPool->iModifiedHead;
1883 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1884 while (idx != NIL_PGMPOOL_IDX)
1885 {
1886 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1887 idx = pPage->iModifiedNext;
1888 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1889 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1890 pPage->cModifications = 0;
1891 Assert(++cPages);
1892 }
1893 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1894 pPool->cModifiedPages = 0;
1895}
1896
1897
1898/**
1899 * Clear all shadow pages and clear all modification counters.
1900 *
1901 * @param pVM The VM handle.
1902 * @remark Should only be used when monitoring is available, thus placed in
1903 * the PGMPOOL_WITH_MONITORING #ifdef.
1904 */
1905void pgmPoolClearAll(PVM pVM)
1906{
1907 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1908 STAM_PROFILE_START(&pPool->StatClearAll, c);
1909 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1910
1911 /*
1912 * Iterate all the pages until we've encountered all that in use.
1913 * This is simple but not quite optimal solution.
1914 */
1915 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1916 unsigned cLeft = pPool->cUsedPages;
1917 unsigned iPage = pPool->cCurPages;
1918 while (--iPage >= PGMPOOL_IDX_FIRST)
1919 {
1920 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1921 if (pPage->GCPhys != NIL_RTGCPHYS)
1922 {
1923 switch (pPage->enmKind)
1924 {
1925 /*
1926 * We only care about shadow page tables.
1927 */
1928 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1929 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1930 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1931 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1932 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1933 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1934 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1935 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1936 {
1937#ifdef PGMPOOL_WITH_USER_TRACKING
1938 if (pPage->cPresent)
1939#endif
1940 {
1941 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1942 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1943 ASMMemZeroPage(pvShw);
1944 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1945#ifdef PGMPOOL_WITH_USER_TRACKING
1946 pPage->cPresent = 0;
1947 pPage->iFirstPresent = ~0;
1948#endif
1949 }
1950 }
1951 /* fall thru */
1952
1953 default:
1954 Assert(!pPage->cModifications || ++cModifiedPages);
1955 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1956 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1957 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1958 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1959 pPage->cModifications = 0;
1960 break;
1961
1962 }
1963 if (!--cLeft)
1964 break;
1965 }
1966 }
1967
1968 /* swipe the special pages too. */
1969 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1970 {
1971 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1972 if (pPage->GCPhys != NIL_RTGCPHYS)
1973 {
1974 Assert(!pPage->cModifications || ++cModifiedPages);
1975 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1976 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1977 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1978 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1979 pPage->cModifications = 0;
1980 }
1981 }
1982
1983#ifndef DEBUG_michael
1984 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1985#endif
1986 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1987 pPool->cModifiedPages = 0;
1988
1989#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1990 /*
1991 * Clear all the GCPhys links and rebuild the phys ext free list.
1992 */
1993 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
1994 pRam;
1995 pRam = pRam->CTX_SUFF(pNext))
1996 {
1997 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1998 while (iPage-- > 0)
1999 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2000 }
2001
2002 pPool->iPhysExtFreeHead = 0;
2003 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2004 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2005 for (unsigned i = 0; i < cMaxPhysExts; i++)
2006 {
2007 paPhysExts[i].iNext = i + 1;
2008 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2009 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2010 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2011 }
2012 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2013#endif
2014
2015
2016 pPool->cPresent = 0;
2017 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2018}
2019
2020/**
2021 * Handle SyncCR3 pool tasks
2022 *
2023 * @returns VBox status code.
2024 * @retval VINF_SUCCESS if successfully added.
2025 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2026 * @param pVM The VM handle.
2027 * @remark Should only be used when monitoring is available, thus placed in
2028 * the PGMPOOL_WITH_MONITORING #ifdef.
2029 */
2030int pgmPoolSyncCR3(PVM pVM)
2031{
2032 /*
2033 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2034 * Occasionally we will have to clear all the shadow page tables because we wanted
2035 * to monitor a page which was mapped by too many shadowed page tables. This operation
2036 * sometimes refered to as a 'lightweight flush'.
2037 */
2038 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2039 pgmPoolMonitorModifiedClearAll(pVM);
2040 else
2041 {
2042# ifndef IN_GC
2043 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2044 pgmPoolClearAll(pVM);
2045# else
2046 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2047 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2048 return VINF_PGM_SYNC_CR3;
2049# endif
2050 }
2051 return VINF_SUCCESS;
2052}
2053#endif /* PGMPOOL_WITH_MONITORING */
2054
2055#ifdef PGMPOOL_WITH_USER_TRACKING
2056/**
2057 * Frees up at least one user entry.
2058 *
2059 * @returns VBox status code.
2060 * @retval VINF_SUCCESS if successfully added.
2061 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2062 * @param pPool The pool.
2063 * @param iUser The user index.
2064 */
2065static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2066{
2067 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2068#ifdef PGMPOOL_WITH_CACHE
2069 /*
2070 * Just free cached pages in a braindead fashion.
2071 */
2072 /** @todo walk the age list backwards and free the first with usage. */
2073 int rc = VINF_SUCCESS;
2074 do
2075 {
2076 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2077 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
2078 rc = rc2;
2079 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2080 return rc;
2081#else
2082 /*
2083 * Lazy approach.
2084 */
2085 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2086 Assert(!CPUMIsGuestInLongMode(pVM));
2087 pgmPoolFlushAllInt(pPool);
2088 return VERR_PGM_POOL_FLUSHED;
2089#endif
2090}
2091
2092
2093/**
2094 * Inserts a page into the cache.
2095 *
2096 * This will create user node for the page, insert it into the GCPhys
2097 * hash, and insert it into the age list.
2098 *
2099 * @returns VBox status code.
2100 * @retval VINF_SUCCESS if successfully added.
2101 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2102 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2103 * @param pPool The pool.
2104 * @param pPage The cached page.
2105 * @param GCPhys The GC physical address of the page we're gonna shadow.
2106 * @param iUser The user index.
2107 * @param iUserTable The user table index.
2108 */
2109DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2110{
2111 int rc = VINF_SUCCESS;
2112 PPGMPOOLUSER pUser = pPool->CTX_SUFF(paUsers);
2113
2114 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
2115
2116 /*
2117 * Find free a user node.
2118 */
2119 uint16_t i = pPool->iUserFreeHead;
2120 if (i == NIL_PGMPOOL_USER_INDEX)
2121 {
2122 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2123 if (VBOX_FAILURE(rc))
2124 return rc;
2125 i = pPool->iUserFreeHead;
2126 }
2127
2128 /*
2129 * Unlink the user node from the free list,
2130 * initialize and insert it into the user list.
2131 */
2132 pPool->iUserFreeHead = pUser[i].iNext;
2133 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
2134 pUser[i].iUser = iUser;
2135 pUser[i].iUserTable = iUserTable;
2136 pPage->iUserHead = i;
2137
2138 /*
2139 * Insert into cache and enable monitoring of the guest page if enabled.
2140 *
2141 * Until we implement caching of all levels, including the CR3 one, we'll
2142 * have to make sure we don't try monitor & cache any recursive reuse of
2143 * a monitored CR3 page. Because all windows versions are doing this we'll
2144 * have to be able to do combined access monitoring, CR3 + PT and
2145 * PD + PT (guest PAE).
2146 *
2147 * Update:
2148 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2149 */
2150#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2151# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2152 const bool fCanBeMonitored = true;
2153# else
2154 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2155 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2156 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2157# endif
2158# ifdef PGMPOOL_WITH_CACHE
2159 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2160# endif
2161 if (fCanBeMonitored)
2162 {
2163# ifdef PGMPOOL_WITH_MONITORING
2164 rc = pgmPoolMonitorInsert(pPool, pPage);
2165 if (rc == VERR_PGM_POOL_CLEARED)
2166 {
2167 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2168# ifndef PGMPOOL_WITH_CACHE
2169 pgmPoolMonitorFlush(pPool, pPage);
2170 rc = VERR_PGM_POOL_FLUSHED;
2171# endif
2172 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2173 pUser[i].iNext = pPool->iUserFreeHead;
2174 pUser[i].iUser = NIL_PGMPOOL_IDX;
2175 pPool->iUserFreeHead = i;
2176 }
2177 }
2178# endif
2179#endif /* PGMPOOL_WITH_MONITORING */
2180 return rc;
2181}
2182
2183
2184# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2185/**
2186 * Adds a user reference to a page.
2187 *
2188 * This will
2189 * This will move the page to the head of the
2190 *
2191 * @returns VBox status code.
2192 * @retval VINF_SUCCESS if successfully added.
2193 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2194 * @param pPool The pool.
2195 * @param pPage The cached page.
2196 * @param iUser The user index.
2197 * @param iUserTable The user table.
2198 */
2199static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2200{
2201 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2202
2203 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
2204# ifdef VBOX_STRICT
2205 /*
2206 * Check that the entry doesn't already exists.
2207 */
2208 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2209 {
2210 uint16_t i = pPage->iUserHead;
2211 do
2212 {
2213 Assert(i < pPool->cMaxUsers);
2214 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2215 i = paUsers[i].iNext;
2216 } while (i != NIL_PGMPOOL_USER_INDEX);
2217 }
2218# endif
2219
2220 /*
2221 * Allocate a user node.
2222 */
2223 uint16_t i = pPool->iUserFreeHead;
2224 if (i == NIL_PGMPOOL_USER_INDEX)
2225 {
2226 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2227 if (VBOX_FAILURE(rc))
2228 return rc;
2229 i = pPool->iUserFreeHead;
2230 }
2231 pPool->iUserFreeHead = paUsers[i].iNext;
2232
2233 /*
2234 * Initialize the user node and insert it.
2235 */
2236 paUsers[i].iNext = pPage->iUserHead;
2237 paUsers[i].iUser = iUser;
2238 paUsers[i].iUserTable = iUserTable;
2239 pPage->iUserHead = i;
2240
2241# ifdef PGMPOOL_WITH_CACHE
2242 /*
2243 * Tell the cache to update its replacement stats for this page.
2244 */
2245 pgmPoolCacheUsed(pPool, pPage);
2246# endif
2247 return VINF_SUCCESS;
2248}
2249# endif /* PGMPOOL_WITH_CACHE */
2250
2251
2252/**
2253 * Frees a user record associated with a page.
2254 *
2255 * This does not clear the entry in the user table, it simply replaces the
2256 * user record to the chain of free records.
2257 *
2258 * @param pPool The pool.
2259 * @param HCPhys The HC physical address of the shadow page.
2260 * @param iUser The shadow page pool index of the user table.
2261 * @param iUserTable The index into the user table (shadowed).
2262 */
2263static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2264{
2265 /*
2266 * Unlink and free the specified user entry.
2267 */
2268 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2269
2270 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2271 uint16_t i = pPage->iUserHead;
2272 if ( i != NIL_PGMPOOL_USER_INDEX
2273 && paUsers[i].iUser == iUser
2274 && paUsers[i].iUserTable == iUserTable)
2275 {
2276 pPage->iUserHead = paUsers[i].iNext;
2277
2278 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2279 paUsers[i].iNext = pPool->iUserFreeHead;
2280 pPool->iUserFreeHead = i;
2281 return;
2282 }
2283
2284 /* General: Linear search. */
2285 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2286 while (i != NIL_PGMPOOL_USER_INDEX)
2287 {
2288 if ( paUsers[i].iUser == iUser
2289 && paUsers[i].iUserTable == iUserTable)
2290 {
2291 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2292 paUsers[iPrev].iNext = paUsers[i].iNext;
2293 else
2294 pPage->iUserHead = paUsers[i].iNext;
2295
2296 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2297 paUsers[i].iNext = pPool->iUserFreeHead;
2298 pPool->iUserFreeHead = i;
2299 return;
2300 }
2301 iPrev = i;
2302 i = paUsers[i].iNext;
2303 }
2304
2305 /* Fatal: didn't find it */
2306 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
2307 iUser, iUserTable, pPage->GCPhys));
2308}
2309
2310
2311/**
2312 * Gets the entry size of a shadow table.
2313 *
2314 * @param enmKind The kind of page.
2315 *
2316 * @returns The size of the entry in bytes. That is, 4 or 8.
2317 * @returns If the kind is not for a table, an assertion is raised and 0 is
2318 * returned.
2319 */
2320DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2321{
2322 switch (enmKind)
2323 {
2324 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2325 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2326 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2327 case PGMPOOLKIND_ROOT_32BIT_PD:
2328 return 4;
2329
2330 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2331 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2332 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2333 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2334 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2335 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2336 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2337 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2338 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2339 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2340 case PGMPOOLKIND_ROOT_PAE_PD:
2341 case PGMPOOLKIND_ROOT_PDPT:
2342 case PGMPOOLKIND_ROOT_NESTED:
2343 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2344 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2345 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2346 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2347 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2348 return 8;
2349
2350 default:
2351 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2352 }
2353}
2354
2355
2356/**
2357 * Gets the entry size of a guest table.
2358 *
2359 * @param enmKind The kind of page.
2360 *
2361 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2362 * @returns If the kind is not for a table, an assertion is raised and 0 is
2363 * returned.
2364 */
2365DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2366{
2367 switch (enmKind)
2368 {
2369 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2370 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2371 case PGMPOOLKIND_ROOT_32BIT_PD:
2372 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2373 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2374 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2375 return 4;
2376
2377 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2378 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2379 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2380 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2381 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2382 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2383 case PGMPOOLKIND_ROOT_PAE_PD:
2384 case PGMPOOLKIND_ROOT_PDPT:
2385 return 8;
2386
2387 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2388 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2389 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2390 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2391 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2392 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2393 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2394 case PGMPOOLKIND_ROOT_NESTED:
2395 /** @todo can we return 0? (nobody is calling this...) */
2396 AssertFailed();
2397 return 0;
2398
2399 default:
2400 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2401 }
2402}
2403
2404
2405#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2406/**
2407 * Scans one shadow page table for mappings of a physical page.
2408 *
2409 * @param pVM The VM handle.
2410 * @param pPhysPage The guest page in question.
2411 * @param iShw The shadow page table.
2412 * @param cRefs The number of references made in that PT.
2413 */
2414static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2415{
2416 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2417 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2418
2419 /*
2420 * Assert sanity.
2421 */
2422 Assert(cRefs == 1);
2423 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2424 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2425
2426 /*
2427 * Then, clear the actual mappings to the page in the shadow PT.
2428 */
2429 switch (pPage->enmKind)
2430 {
2431 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2432 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2433 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2434 {
2435 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2436 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2437 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2438 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2439 {
2440 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2441 pPT->a[i].u = 0;
2442 cRefs--;
2443 if (!cRefs)
2444 return;
2445 }
2446#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2447 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2448 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2449 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2450 {
2451 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2452 pPT->a[i].u = 0;
2453 }
2454#endif
2455 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2456 break;
2457 }
2458
2459 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2460 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2461 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2462 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2463 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2464 {
2465 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2466 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2467 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2468 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2469 {
2470 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2471 pPT->a[i].u = 0;
2472 cRefs--;
2473 if (!cRefs)
2474 return;
2475 }
2476#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2477 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2478 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2479 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2480 {
2481 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2482 pPT->a[i].u = 0;
2483 }
2484#endif
2485 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2486 break;
2487 }
2488
2489 default:
2490 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2491 }
2492}
2493
2494
2495/**
2496 * Scans one shadow page table for mappings of a physical page.
2497 *
2498 * @param pVM The VM handle.
2499 * @param pPhysPage The guest page in question.
2500 * @param iShw The shadow page table.
2501 * @param cRefs The number of references made in that PT.
2502 */
2503void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2504{
2505 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2506 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2507 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2508 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2509 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2510 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2511}
2512
2513
2514/**
2515 * Flushes a list of shadow page tables mapping the same physical page.
2516 *
2517 * @param pVM The VM handle.
2518 * @param pPhysPage The guest page in question.
2519 * @param iPhysExt The physical cross reference extent list to flush.
2520 */
2521void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2522{
2523 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2524 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2525 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2526
2527 const uint16_t iPhysExtStart = iPhysExt;
2528 PPGMPOOLPHYSEXT pPhysExt;
2529 do
2530 {
2531 Assert(iPhysExt < pPool->cMaxPhysExts);
2532 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2533 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2534 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2535 {
2536 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2537 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2538 }
2539
2540 /* next */
2541 iPhysExt = pPhysExt->iNext;
2542 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2543
2544 /* insert the list into the free list and clear the ram range entry. */
2545 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2546 pPool->iPhysExtFreeHead = iPhysExtStart;
2547 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2548
2549 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2550}
2551#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2552
2553
2554/**
2555 * Scans all shadow page tables for mappings of a physical page.
2556 *
2557 * This may be slow, but it's most likely more efficient than cleaning
2558 * out the entire page pool / cache.
2559 *
2560 * @returns VBox status code.
2561 * @retval VINF_SUCCESS if all references has been successfully cleared.
2562 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2563 * a page pool cleaning.
2564 *
2565 * @param pVM The VM handle.
2566 * @param pPhysPage The guest page in question.
2567 */
2568int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2569{
2570 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2571 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2572 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2573 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2574
2575#if 1
2576 /*
2577 * There is a limit to what makes sense.
2578 */
2579 if (pPool->cPresent > 1024)
2580 {
2581 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2582 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2583 return VINF_PGM_GCPHYS_ALIASED;
2584 }
2585#endif
2586
2587 /*
2588 * Iterate all the pages until we've encountered all that in use.
2589 * This is simple but not quite optimal solution.
2590 */
2591 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2592 const uint32_t u32 = u64;
2593 unsigned cLeft = pPool->cUsedPages;
2594 unsigned iPage = pPool->cCurPages;
2595 while (--iPage >= PGMPOOL_IDX_FIRST)
2596 {
2597 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2598 if (pPage->GCPhys != NIL_RTGCPHYS)
2599 {
2600 switch (pPage->enmKind)
2601 {
2602 /*
2603 * We only care about shadow page tables.
2604 */
2605 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2606 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2607 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2608 {
2609 unsigned cPresent = pPage->cPresent;
2610 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2611 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2612 if (pPT->a[i].n.u1Present)
2613 {
2614 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2615 {
2616 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2617 pPT->a[i].u = 0;
2618 }
2619 if (!--cPresent)
2620 break;
2621 }
2622 break;
2623 }
2624
2625 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2626 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2627 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2628 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2629 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2630 {
2631 unsigned cPresent = pPage->cPresent;
2632 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2633 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2634 if (pPT->a[i].n.u1Present)
2635 {
2636 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2637 {
2638 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2639 pPT->a[i].u = 0;
2640 }
2641 if (!--cPresent)
2642 break;
2643 }
2644 break;
2645 }
2646 }
2647 if (!--cLeft)
2648 break;
2649 }
2650 }
2651
2652 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2653 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2654 return VINF_SUCCESS;
2655}
2656
2657
2658/**
2659 * Clears the user entry in a user table.
2660 *
2661 * This is used to remove all references to a page when flushing it.
2662 */
2663static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2664{
2665 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2666 Assert(pUser->iUser < pPool->cCurPages);
2667
2668 /*
2669 * Map the user page.
2670 */
2671 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2672 union
2673 {
2674 uint64_t *pau64;
2675 uint32_t *pau32;
2676 } u;
2677 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
2678
2679 /* Safety precaution in case we change the paging for other modes too in the future. */
2680 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
2681
2682#ifdef VBOX_STRICT
2683 /*
2684 * Some sanity checks.
2685 */
2686 switch (pUserPage->enmKind)
2687 {
2688 case PGMPOOLKIND_ROOT_32BIT_PD:
2689 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2690 Assert(!(u.pau32[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2691 break;
2692 case PGMPOOLKIND_ROOT_PAE_PD:
2693 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2694 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2695 break;
2696 case PGMPOOLKIND_ROOT_PDPT:
2697 Assert(pUser->iUserTable < 4);
2698 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2699 break;
2700 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2701 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2702 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2703 break;
2704 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2705 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2706 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2707 break;
2708 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2709 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2710 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2711 break;
2712 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2713 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2714 /* GCPhys >> PAGE_SHIFT is the index here */
2715 break;
2716 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2717 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2718 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2719 break;
2720
2721 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2722 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2723 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2724 break;
2725
2726 case PGMPOOLKIND_ROOT_NESTED:
2727 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2728 break;
2729
2730 default:
2731 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2732 break;
2733 }
2734#endif /* VBOX_STRICT */
2735
2736 /*
2737 * Clear the entry in the user page.
2738 */
2739 switch (pUserPage->enmKind)
2740 {
2741 /* 32-bit entries */
2742 case PGMPOOLKIND_ROOT_32BIT_PD:
2743 u.pau32[pUser->iUserTable] = 0;
2744 break;
2745
2746 /* 64-bit entries */
2747 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2748 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2749 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2750 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2751 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2752 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2753 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2754 case PGMPOOLKIND_ROOT_PAE_PD:
2755 case PGMPOOLKIND_ROOT_PDPT:
2756 case PGMPOOLKIND_ROOT_NESTED:
2757 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2758 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2759 u.pau64[pUser->iUserTable] = 0;
2760 break;
2761
2762 default:
2763 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2764 }
2765}
2766
2767
2768/**
2769 * Clears all users of a page.
2770 */
2771static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2772{
2773 /*
2774 * Free all the user records.
2775 */
2776 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2777 uint16_t i = pPage->iUserHead;
2778 while (i != NIL_PGMPOOL_USER_INDEX)
2779 {
2780 /* Clear enter in user table. */
2781 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2782
2783 /* Free it. */
2784 const uint16_t iNext = paUsers[i].iNext;
2785 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2786 paUsers[i].iNext = pPool->iUserFreeHead;
2787 pPool->iUserFreeHead = i;
2788
2789 /* Next. */
2790 i = iNext;
2791 }
2792 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2793}
2794
2795
2796#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2797/**
2798 * Allocates a new physical cross reference extent.
2799 *
2800 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2801 * @param pVM The VM handle.
2802 * @param piPhysExt Where to store the phys ext index.
2803 */
2804PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2805{
2806 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2807 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2808 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2809 {
2810 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2811 return NULL;
2812 }
2813 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2814 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2815 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2816 *piPhysExt = iPhysExt;
2817 return pPhysExt;
2818}
2819
2820
2821/**
2822 * Frees a physical cross reference extent.
2823 *
2824 * @param pVM The VM handle.
2825 * @param iPhysExt The extent to free.
2826 */
2827void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2828{
2829 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2830 Assert(iPhysExt < pPool->cMaxPhysExts);
2831 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2832 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2833 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2834 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2835 pPool->iPhysExtFreeHead = iPhysExt;
2836}
2837
2838
2839/**
2840 * Frees a physical cross reference extent.
2841 *
2842 * @param pVM The VM handle.
2843 * @param iPhysExt The extent to free.
2844 */
2845void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2846{
2847 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2848
2849 const uint16_t iPhysExtStart = iPhysExt;
2850 PPGMPOOLPHYSEXT pPhysExt;
2851 do
2852 {
2853 Assert(iPhysExt < pPool->cMaxPhysExts);
2854 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2855 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2856 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2857
2858 /* next */
2859 iPhysExt = pPhysExt->iNext;
2860 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2861
2862 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2863 pPool->iPhysExtFreeHead = iPhysExtStart;
2864}
2865
2866/**
2867 * Insert a reference into a list of physical cross reference extents.
2868 *
2869 * @returns The new ram range flags (top 16-bits).
2870 *
2871 * @param pVM The VM handle.
2872 * @param iPhysExt The physical extent index of the list head.
2873 * @param iShwPT The shadow page table index.
2874 *
2875 */
2876static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2877{
2878 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2879 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2880
2881 /* special common case. */
2882 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2883 {
2884 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2885 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2886 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2887 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2888 }
2889
2890 /* general treatment. */
2891 const uint16_t iPhysExtStart = iPhysExt;
2892 unsigned cMax = 15;
2893 for (;;)
2894 {
2895 Assert(iPhysExt < pPool->cMaxPhysExts);
2896 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2897 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2898 {
2899 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2900 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2901 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2902 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2903 }
2904 if (!--cMax)
2905 {
2906 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2907 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2908 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2909 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2910 }
2911 }
2912
2913 /* add another extent to the list. */
2914 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2915 if (!pNew)
2916 {
2917 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2918 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2919 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2920 }
2921 pNew->iNext = iPhysExtStart;
2922 pNew->aidx[0] = iShwPT;
2923 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2924 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2925}
2926
2927
2928/**
2929 * Add a reference to guest physical page where extents are in use.
2930 *
2931 * @returns The new ram range flags (top 16-bits).
2932 *
2933 * @param pVM The VM handle.
2934 * @param u16 The ram range flags (top 16-bits).
2935 * @param iShwPT The shadow page table index.
2936 */
2937uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2938{
2939 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2940 {
2941 /*
2942 * Convert to extent list.
2943 */
2944 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2945 uint16_t iPhysExt;
2946 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2947 if (pPhysExt)
2948 {
2949 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2950 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2951 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2952 pPhysExt->aidx[1] = iShwPT;
2953 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2954 }
2955 else
2956 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2957 }
2958 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2959 {
2960 /*
2961 * Insert into the extent list.
2962 */
2963 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2964 }
2965 else
2966 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2967 return u16;
2968}
2969
2970
2971/**
2972 * Clear references to guest physical memory.
2973 *
2974 * @param pPool The pool.
2975 * @param pPage The page.
2976 * @param pPhysPage Pointer to the aPages entry in the ram range.
2977 */
2978void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
2979{
2980 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
2981 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2982
2983 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2984 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2985 {
2986 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2987 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2988 do
2989 {
2990 Assert(iPhysExt < pPool->cMaxPhysExts);
2991
2992 /*
2993 * Look for the shadow page and check if it's all freed.
2994 */
2995 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2996 {
2997 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2998 {
2999 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3000
3001 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3002 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3003 {
3004 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3005 return;
3006 }
3007
3008 /* we can free the node. */
3009 PVM pVM = pPool->CTX_SUFF(pVM);
3010 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3011 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3012 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3013 {
3014 /* lonely node */
3015 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3016 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
3017 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3018 }
3019 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3020 {
3021 /* head */
3022 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
3023 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
3024 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
3025 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
3026 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3027 }
3028 else
3029 {
3030 /* in list */
3031 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3032 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3033 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3034 }
3035 iPhysExt = iPhysExtNext;
3036 return;
3037 }
3038 }
3039
3040 /* next */
3041 iPhysExtPrev = iPhysExt;
3042 iPhysExt = paPhysExts[iPhysExt].iNext;
3043 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3044
3045 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3046 }
3047 else /* nothing to do */
3048 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
3049}
3050
3051
3052
3053/**
3054 * Clear references to guest physical memory.
3055 *
3056 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3057 * is assumed to be correct, so the linear search can be skipped and we can assert
3058 * at an earlier point.
3059 *
3060 * @param pPool The pool.
3061 * @param pPage The page.
3062 * @param HCPhys The host physical address corresponding to the guest page.
3063 * @param GCPhys The guest physical address corresponding to HCPhys.
3064 */
3065static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3066{
3067 /*
3068 * Walk range list.
3069 */
3070 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3071 while (pRam)
3072 {
3073 RTGCPHYS off = GCPhys - pRam->GCPhys;
3074 if (off < pRam->cb)
3075 {
3076 /* does it match? */
3077 const unsigned iPage = off >> PAGE_SHIFT;
3078 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3079RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3080Log(("pgmPoolTracDerefGCPhys %VHp vs %VHp\n", HCPhysPage, HCPhys));
3081 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3082 {
3083 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3084 return;
3085 }
3086 break;
3087 }
3088 pRam = pRam->CTX_SUFF(pNext);
3089 }
3090 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
3091}
3092
3093
3094/**
3095 * Clear references to guest physical memory.
3096 *
3097 * @param pPool The pool.
3098 * @param pPage The page.
3099 * @param HCPhys The host physical address corresponding to the guest page.
3100 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3101 */
3102static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3103{
3104 /*
3105 * Walk range list.
3106 */
3107 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3108 while (pRam)
3109 {
3110 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3111 if (off < pRam->cb)
3112 {
3113 /* does it match? */
3114 const unsigned iPage = off >> PAGE_SHIFT;
3115 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3116 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3117 {
3118 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3119 return;
3120 }
3121 break;
3122 }
3123 pRam = pRam->CTX_SUFF(pNext);
3124 }
3125
3126 /*
3127 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3128 */
3129 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3130 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3131 while (pRam)
3132 {
3133 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3134 while (iPage-- > 0)
3135 {
3136 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3137 {
3138 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
3139 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3140 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3141 return;
3142 }
3143 }
3144 pRam = pRam->CTX_SUFF(pNext);
3145 }
3146
3147 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
3148}
3149
3150
3151/**
3152 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3153 *
3154 * @param pPool The pool.
3155 * @param pPage The page.
3156 * @param pShwPT The shadow page table (mapping of the page).
3157 * @param pGstPT The guest page table.
3158 */
3159DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3160{
3161 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3162 if (pShwPT->a[i].n.u1Present)
3163 {
3164 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3165 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3166 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3167 if (!--pPage->cPresent)
3168 break;
3169 }
3170}
3171
3172
3173/**
3174 * Clear references to guest physical memory in a PAE / 32-bit page table.
3175 *
3176 * @param pPool The pool.
3177 * @param pPage The page.
3178 * @param pShwPT The shadow page table (mapping of the page).
3179 * @param pGstPT The guest page table (just a half one).
3180 */
3181DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3182{
3183 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3184 if (pShwPT->a[i].n.u1Present)
3185 {
3186 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3187 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3188 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3189 }
3190}
3191
3192
3193/**
3194 * Clear references to guest physical memory in a PAE / PAE page table.
3195 *
3196 * @param pPool The pool.
3197 * @param pPage The page.
3198 * @param pShwPT The shadow page table (mapping of the page).
3199 * @param pGstPT The guest page table.
3200 */
3201DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3202{
3203 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3204 if (pShwPT->a[i].n.u1Present)
3205 {
3206 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3207 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3208 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3209 }
3210}
3211
3212
3213/**
3214 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3215 *
3216 * @param pPool The pool.
3217 * @param pPage The page.
3218 * @param pShwPT The shadow page table (mapping of the page).
3219 */
3220DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3221{
3222 RTGCPHYS GCPhys = pPage->GCPhys;
3223 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3224 if (pShwPT->a[i].n.u1Present)
3225 {
3226 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3227 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3228 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3229 }
3230}
3231
3232
3233/**
3234 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3235 *
3236 * @param pPool The pool.
3237 * @param pPage The page.
3238 * @param pShwPT The shadow page table (mapping of the page).
3239 */
3240DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3241{
3242 RTGCPHYS GCPhys = pPage->GCPhys;
3243 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3244 if (pShwPT->a[i].n.u1Present)
3245 {
3246 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%VGp\n",
3247 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3248 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3249 }
3250}
3251#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3252
3253
3254/**
3255 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3256 *
3257 * @param pPool The pool.
3258 * @param pPage The page.
3259 * @param pShwPD The shadow page directory (mapping of the page).
3260 */
3261DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3262{
3263 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3264 {
3265 if (pShwPD->a[i].n.u1Present)
3266 {
3267 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3268 if (pSubPage)
3269 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3270 else
3271 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3272 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3273 }
3274 }
3275}
3276
3277
3278/**
3279 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3280 *
3281 * @param pPool The pool.
3282 * @param pPage The page.
3283 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3284 */
3285DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3286{
3287 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3288 {
3289 if (pShwPDPT->a[i].n.u1Present)
3290 {
3291 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3292 if (pSubPage)
3293 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3294 else
3295 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3296 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3297 }
3298 }
3299}
3300
3301/**
3302 * Clear references to shadowed pages in a 64-bit level 4 page table.
3303 *
3304 * @param pPool The pool.
3305 * @param pPage The page.
3306 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3307 */
3308DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3309{
3310 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3311 {
3312 if (pShwPML4->a[i].n.u1Present)
3313 {
3314 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3315 if (pSubPage)
3316 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3317 else
3318 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3319 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3320 }
3321 }
3322}
3323
3324/**
3325 * Clear references to shadowed pages in an EPT page table.
3326 *
3327 * @param pPool The pool.
3328 * @param pPage The page.
3329 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3330 */
3331DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3332{
3333 RTGCPHYS GCPhys = pPage->GCPhys;
3334 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3335 if (pShwPT->a[i].n.u1Present)
3336 {
3337 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3338 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3339 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3340 }
3341}
3342
3343/**
3344 * Clear references to shadowed pages in an EPT page directory.
3345 *
3346 * @param pPool The pool.
3347 * @param pPage The page.
3348 * @param pShwPD The shadow page directory (mapping of the page).
3349 */
3350DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3351{
3352 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3353 {
3354 if (pShwPD->a[i].n.u1Present)
3355 {
3356 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3357 if (pSubPage)
3358 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3359 else
3360 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3361 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3362 }
3363 }
3364}
3365
3366/**
3367 * Clear references to shadowed pages in an EPT page directory pointer table.
3368 *
3369 * @param pPool The pool.
3370 * @param pPage The page.
3371 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3372 */
3373DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3374{
3375 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3376 {
3377 if (pShwPDPT->a[i].n.u1Present)
3378 {
3379 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3380 if (pSubPage)
3381 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3382 else
3383 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3384 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3385 }
3386 }
3387}
3388
3389/**
3390 * Clears all references made by this page.
3391 *
3392 * This includes other shadow pages and GC physical addresses.
3393 *
3394 * @param pPool The pool.
3395 * @param pPage The page.
3396 */
3397static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3398{
3399 /*
3400 * Map the shadow page and take action according to the page kind.
3401 */
3402 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3403 switch (pPage->enmKind)
3404 {
3405#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3406 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3407 {
3408 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3409 void *pvGst;
3410 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3411 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3412 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3413 break;
3414 }
3415
3416 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3417 {
3418 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3419 void *pvGst;
3420 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3421 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3422 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3423 break;
3424 }
3425
3426 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3427 {
3428 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3429 void *pvGst;
3430 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3431 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3432 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3433 break;
3434 }
3435
3436 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3437 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3438 {
3439 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3440 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3441 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3442 break;
3443 }
3444
3445 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3446 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3447 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3448 {
3449 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3450 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3451 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3452 break;
3453 }
3454
3455#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3456 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3457 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3458 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3459 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3460 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3461 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3462 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3463 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3464 break;
3465#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3466
3467 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
3468 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3469 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3470 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3471 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3472 break;
3473
3474 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3475 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3476 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3477 break;
3478
3479 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
3480 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3481 break;
3482
3483 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3484 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
3485 break;
3486
3487 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3488 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
3489 break;
3490
3491 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3492 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
3493 break;
3494
3495 default:
3496 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3497 }
3498
3499 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3500 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3501 ASMMemZeroPage(pvShw);
3502 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3503 pPage->fZeroed = true;
3504}
3505#endif /* PGMPOOL_WITH_USER_TRACKING */
3506
3507
3508/**
3509 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3510 *
3511 * @param pPool The pool.
3512 */
3513static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3514{
3515 /*
3516 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3517 */
3518 Assert(NIL_PGMPOOL_IDX == 0);
3519 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3520 {
3521 /*
3522 * Get the page address.
3523 */
3524 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3525 union
3526 {
3527 uint64_t *pau64;
3528 uint32_t *pau32;
3529 } u;
3530 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3531
3532 /*
3533 * Mark stuff not present.
3534 */
3535 switch (pPage->enmKind)
3536 {
3537 case PGMPOOLKIND_ROOT_32BIT_PD:
3538 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3539 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3540 u.pau32[iPage] = 0;
3541 break;
3542
3543 case PGMPOOLKIND_ROOT_PAE_PD:
3544 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * X86_PG_PAE_PDPE_ENTRIES; iPage++)
3545 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3546 u.pau64[iPage] = 0;
3547 break;
3548
3549 case PGMPOOLKIND_ROOT_PDPT:
3550 /* Not root of shadowed pages currently, ignore it. */
3551 break;
3552
3553 case PGMPOOLKIND_ROOT_NESTED:
3554 ASMMemZero32(u.pau64, PAGE_SIZE);
3555 break;
3556 }
3557 }
3558
3559 /*
3560 * Paranoia (to be removed), flag a global CR3 sync.
3561 */
3562 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
3563}
3564
3565
3566/**
3567 * Flushes the entire cache.
3568 *
3569 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3570 * and execute this CR3 flush.
3571 *
3572 * @param pPool The pool.
3573 */
3574static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3575{
3576 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3577 LogFlow(("pgmPoolFlushAllInt:\n"));
3578
3579 /*
3580 * If there are no pages in the pool, there is nothing to do.
3581 */
3582 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3583 {
3584 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3585 return;
3586 }
3587
3588 /*
3589 * Nuke the free list and reinsert all pages into it.
3590 */
3591 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3592 {
3593 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3594
3595#ifdef IN_RING3
3596 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMR3, pPage->pvPageR3));
3597#endif
3598#ifdef PGMPOOL_WITH_MONITORING
3599 if (pPage->fMonitored)
3600 pgmPoolMonitorFlush(pPool, pPage);
3601 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3602 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3603 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3604 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3605 pPage->cModifications = 0;
3606#endif
3607 pPage->GCPhys = NIL_RTGCPHYS;
3608 pPage->enmKind = PGMPOOLKIND_FREE;
3609 Assert(pPage->idx == i);
3610 pPage->iNext = i + 1;
3611 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3612 pPage->fSeenNonGlobal = false;
3613 pPage->fMonitored= false;
3614 pPage->fCached = false;
3615 pPage->fReusedFlushPending = false;
3616 pPage->fCR3Mix = false;
3617#ifdef PGMPOOL_WITH_USER_TRACKING
3618 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3619#endif
3620#ifdef PGMPOOL_WITH_CACHE
3621 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3622 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3623#endif
3624 }
3625 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3626 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3627 pPool->cUsedPages = 0;
3628
3629#ifdef PGMPOOL_WITH_USER_TRACKING
3630 /*
3631 * Zap and reinitialize the user records.
3632 */
3633 pPool->cPresent = 0;
3634 pPool->iUserFreeHead = 0;
3635 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3636 const unsigned cMaxUsers = pPool->cMaxUsers;
3637 for (unsigned i = 0; i < cMaxUsers; i++)
3638 {
3639 paUsers[i].iNext = i + 1;
3640 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3641 paUsers[i].iUserTable = 0xfffffffe;
3642 }
3643 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3644#endif
3645
3646#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3647 /*
3648 * Clear all the GCPhys links and rebuild the phys ext free list.
3649 */
3650 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3651 pRam;
3652 pRam = pRam->CTX_SUFF(pNext))
3653 {
3654 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3655 while (iPage-- > 0)
3656 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3657 }
3658
3659 pPool->iPhysExtFreeHead = 0;
3660 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3661 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3662 for (unsigned i = 0; i < cMaxPhysExts; i++)
3663 {
3664 paPhysExts[i].iNext = i + 1;
3665 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3666 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3667 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3668 }
3669 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3670#endif
3671
3672#ifdef PGMPOOL_WITH_MONITORING
3673 /*
3674 * Just zap the modified list.
3675 */
3676 pPool->cModifiedPages = 0;
3677 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3678#endif
3679
3680#ifdef PGMPOOL_WITH_CACHE
3681 /*
3682 * Clear the GCPhys hash and the age list.
3683 */
3684 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
3685 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3686 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3687 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3688#endif
3689
3690 /*
3691 * Flush all the special root pages.
3692 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3693 */
3694 pgmPoolFlushAllSpecialRoots(pPool);
3695 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3696 {
3697 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3698 pPage->iNext = NIL_PGMPOOL_IDX;
3699#ifdef PGMPOOL_WITH_MONITORING
3700 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3701 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3702 pPage->cModifications = 0;
3703 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3704 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3705 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3706 if (pPage->fMonitored)
3707 {
3708 PVM pVM = pPool->CTX_SUFF(pVM);
3709 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3710 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3711 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3712 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
3713 pPool->pszAccessHandler);
3714 AssertFatalRCSuccess(rc);
3715# ifdef PGMPOOL_WITH_CACHE
3716 pgmPoolHashInsert(pPool, pPage);
3717# endif
3718 }
3719#endif
3720#ifdef PGMPOOL_WITH_USER_TRACKING
3721 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3722#endif
3723#ifdef PGMPOOL_WITH_CACHE
3724 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3725 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3726#endif
3727 }
3728
3729 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3730}
3731
3732
3733/**
3734 * Flushes a pool page.
3735 *
3736 * This moves the page to the free list after removing all user references to it.
3737 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3738 *
3739 * @returns VBox status code.
3740 * @retval VINF_SUCCESS on success.
3741 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3742 * @param pPool The pool.
3743 * @param HCPhys The HC physical address of the shadow page.
3744 */
3745int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3746{
3747 int rc = VINF_SUCCESS;
3748 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3749 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3750 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3751
3752 /*
3753 * Quietly reject any attempts at flushing any of the special root pages.
3754 */
3755 if (pPage->idx < PGMPOOL_IDX_FIRST)
3756 {
3757 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3758 return VINF_SUCCESS;
3759 }
3760
3761 /*
3762 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
3763 */
3764 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
3765 {
3766 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4, ("Can't free the shadow CR3! (%VGp vs %VGp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
3767 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3768 return VINF_SUCCESS;
3769 }
3770
3771 /*
3772 * Mark the page as being in need of a ASMMemZeroPage().
3773 */
3774 pPage->fZeroed = false;
3775
3776#ifdef PGMPOOL_WITH_USER_TRACKING
3777 /*
3778 * Clear the page.
3779 */
3780 pgmPoolTrackClearPageUsers(pPool, pPage);
3781 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3782 pgmPoolTrackDeref(pPool, pPage);
3783 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3784#endif
3785
3786#ifdef PGMPOOL_WITH_CACHE
3787 /*
3788 * Flush it from the cache.
3789 */
3790 pgmPoolCacheFlushPage(pPool, pPage);
3791#endif /* PGMPOOL_WITH_CACHE */
3792
3793#ifdef PGMPOOL_WITH_MONITORING
3794 /*
3795 * Deregistering the monitoring.
3796 */
3797 if (pPage->fMonitored)
3798 rc = pgmPoolMonitorFlush(pPool, pPage);
3799#endif
3800
3801 /*
3802 * Free the page.
3803 */
3804 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3805 pPage->iNext = pPool->iFreeHead;
3806 pPool->iFreeHead = pPage->idx;
3807 pPage->enmKind = PGMPOOLKIND_FREE;
3808 pPage->GCPhys = NIL_RTGCPHYS;
3809 pPage->fReusedFlushPending = false;
3810
3811 pPool->cUsedPages--;
3812 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3813 return rc;
3814}
3815
3816
3817/**
3818 * Frees a usage of a pool page.
3819 *
3820 * The caller is responsible to updating the user table so that it no longer
3821 * references the shadow page.
3822 *
3823 * @param pPool The pool.
3824 * @param HCPhys The HC physical address of the shadow page.
3825 * @param iUser The shadow page pool index of the user table.
3826 * @param iUserTable The index into the user table (shadowed).
3827 */
3828void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3829{
3830 STAM_PROFILE_START(&pPool->StatFree, a);
3831 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3832 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3833 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3834#ifdef PGMPOOL_WITH_USER_TRACKING
3835 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3836#endif
3837#ifdef PGMPOOL_WITH_CACHE
3838 if (!pPage->fCached)
3839#endif
3840 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3841 STAM_PROFILE_STOP(&pPool->StatFree, a);
3842}
3843
3844
3845/**
3846 * Makes one or more free page free.
3847 *
3848 * @returns VBox status code.
3849 * @retval VINF_SUCCESS on success.
3850 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3851 *
3852 * @param pPool The pool.
3853 * @param iUser The user of the page.
3854 */
3855static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3856{
3857 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3858
3859 /*
3860 * If the pool isn't full grown yet, expand it.
3861 */
3862 if (pPool->cCurPages < pPool->cMaxPages)
3863 {
3864 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3865#ifdef IN_RING3
3866 int rc = PGMR3PoolGrow(pPool->pVMR3);
3867#else
3868 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3869#endif
3870 if (VBOX_FAILURE(rc))
3871 return rc;
3872 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3873 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3874 return VINF_SUCCESS;
3875 }
3876
3877#ifdef PGMPOOL_WITH_CACHE
3878 /*
3879 * Free one cached page.
3880 */
3881 return pgmPoolCacheFreeOne(pPool, iUser);
3882#else
3883 /*
3884 * Flush the pool.
3885 * If we have tracking enabled, it should be possible to come up with
3886 * a cheap replacement strategy...
3887 */
3888 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
3889 Assert(!CPUMIsGuestInLongMode(pVM));
3890 pgmPoolFlushAllInt(pPool);
3891 return VERR_PGM_POOL_FLUSHED;
3892#endif
3893}
3894
3895
3896/**
3897 * Allocates a page from the pool.
3898 *
3899 * This page may actually be a cached page and not in need of any processing
3900 * on the callers part.
3901 *
3902 * @returns VBox status code.
3903 * @retval VINF_SUCCESS if a NEW page was allocated.
3904 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3905 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3906 * @param pVM The VM handle.
3907 * @param GCPhys The GC physical address of the page we're gonna shadow.
3908 * For 4MB and 2MB PD entries, it's the first address the
3909 * shadow PT is covering.
3910 * @param enmKind The kind of mapping.
3911 * @param iUser The shadow page pool index of the user table.
3912 * @param iUserTable The index into the user table (shadowed).
3913 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3914 */
3915int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
3916{
3917 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3918 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3919 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3920 *ppPage = NULL;
3921
3922#ifdef PGMPOOL_WITH_CACHE
3923 if (pPool->fCacheEnabled)
3924 {
3925 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3926 if (VBOX_SUCCESS(rc2))
3927 {
3928 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3929 LogFlow(("pgmPoolAlloc: cached returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3930 return rc2;
3931 }
3932 }
3933#endif
3934
3935 /*
3936 * Allocate a new one.
3937 */
3938 int rc = VINF_SUCCESS;
3939 uint16_t iNew = pPool->iFreeHead;
3940 if (iNew == NIL_PGMPOOL_IDX)
3941 {
3942 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3943 if (VBOX_FAILURE(rc))
3944 {
3945 if (rc != VERR_PGM_POOL_CLEARED)
3946 {
3947 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3948 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3949 return rc;
3950 }
3951 Log(("pgmPoolMakeMoreFreePages failed with %Vrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
3952 rc = VERR_PGM_POOL_FLUSHED;
3953 }
3954 iNew = pPool->iFreeHead;
3955 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3956 }
3957
3958 /* unlink the free head */
3959 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3960 pPool->iFreeHead = pPage->iNext;
3961 pPage->iNext = NIL_PGMPOOL_IDX;
3962
3963 /*
3964 * Initialize it.
3965 */
3966 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3967 pPage->enmKind = enmKind;
3968 pPage->GCPhys = GCPhys;
3969 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3970 pPage->fMonitored = false;
3971 pPage->fCached = false;
3972 pPage->fReusedFlushPending = false;
3973 pPage->fCR3Mix = false;
3974#ifdef PGMPOOL_WITH_MONITORING
3975 pPage->cModifications = 0;
3976 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3977 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3978#endif
3979#ifdef PGMPOOL_WITH_USER_TRACKING
3980 pPage->cPresent = 0;
3981 pPage->iFirstPresent = ~0;
3982
3983 /*
3984 * Insert into the tracking and cache. If this fails, free the page.
3985 */
3986 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3987 if (VBOX_FAILURE(rc3))
3988 {
3989 if (rc3 != VERR_PGM_POOL_CLEARED)
3990 {
3991 pPool->cUsedPages--;
3992 pPage->enmKind = PGMPOOLKIND_FREE;
3993 pPage->GCPhys = NIL_RTGCPHYS;
3994 pPage->iNext = pPool->iFreeHead;
3995 pPool->iFreeHead = pPage->idx;
3996 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3997 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3998 return rc3;
3999 }
4000 Log(("pgmPoolTrackInsert failed with %Vrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4001 rc = VERR_PGM_POOL_FLUSHED;
4002 }
4003#endif /* PGMPOOL_WITH_USER_TRACKING */
4004
4005 /*
4006 * Commit the allocation, clear the page and return.
4007 */
4008#ifdef VBOX_WITH_STATISTICS
4009 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4010 pPool->cUsedPagesHigh = pPool->cUsedPages;
4011#endif
4012
4013 if (!pPage->fZeroed)
4014 {
4015 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4016 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4017 ASMMemZeroPage(pv);
4018 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4019 }
4020
4021 *ppPage = pPage;
4022 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4023 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4024 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4025 return rc;
4026}
4027
4028
4029/**
4030 * Frees a usage of a pool page.
4031 *
4032 * @param pVM The VM handle.
4033 * @param HCPhys The HC physical address of the shadow page.
4034 * @param iUser The shadow page pool index of the user table.
4035 * @param iUserTable The index into the user table (shadowed).
4036 */
4037void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4038{
4039 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4040 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4041 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4042}
4043
4044
4045/**
4046 * Gets a in-use page in the pool by it's physical address.
4047 *
4048 * @returns Pointer to the page.
4049 * @param pVM The VM handle.
4050 * @param HCPhys The HC physical address of the shadow page.
4051 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4052 */
4053PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4054{
4055 /** @todo profile this! */
4056 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4057 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4058 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
4059 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
4060 return pPage;
4061}
4062
4063
4064/**
4065 * Flushes the entire cache.
4066 *
4067 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4068 * and execute this CR3 flush.
4069 *
4070 * @param pPool The pool.
4071 */
4072void pgmPoolFlushAll(PVM pVM)
4073{
4074 LogFlow(("pgmPoolFlushAll:\n"));
4075 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4076}
4077
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette