VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 13543

Last change on this file since 13543 was 13543, checked in by vboxsync, 16 years ago

#1865: PGMPool.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 147.4 KB
Line 
1/* $Id: PGMAllPool.cpp 13543 2008-10-23 16:49:58Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_GC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42
43
44/*******************************************************************************
45* Internal Functions *
46*******************************************************************************/
47__BEGIN_DECLS
48static void pgmPoolFlushAllInt(PPGMPOOL pPool);
49#ifdef PGMPOOL_WITH_USER_TRACKING
50DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
51DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#endif
54#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
55static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
56#endif
57#ifdef PGMPOOL_WITH_CACHE
58static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
59#endif
60#ifdef PGMPOOL_WITH_MONITORING
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#endif
63#ifndef IN_RING3
64DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
65#endif
66__END_DECLS
67
68
69/**
70 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
71 *
72 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
73 * @param enmKind The page kind.
74 */
75DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
76{
77 switch (enmKind)
78 {
79 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
82 return true;
83 default:
84 return false;
85 }
86}
87
88
89#if defined(IN_GC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
90/**
91 * Maps a pool page into the current context.
92 *
93 * @returns Pointer to the mapping.
94 * @param pVM The VM handle.
95 * @param pPage The page to map.
96 */
97void *pgmPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
98{
99 /* general pages. */
100 if (pPage->idx >= PGMPOOL_IDX_FIRST)
101 {
102 Assert(pPage->idx < pVM->pgm.s.CTX_SUFF(pPool)->cCurPages);
103 void *pv;
104 int rc = PGMDynMapHCPage(pVM, pPage->Core.Key, &pv);
105 AssertReleaseRC(rc);
106 return pv;
107 }
108
109 /* special pages. */
110# ifdef IN_GC
111 switch (pPage->idx)
112 {
113 case PGMPOOL_IDX_PD:
114 return pVM->pgm.s.pGC32BitPD;
115 case PGMPOOL_IDX_PAE_PD:
116 case PGMPOOL_IDX_PAE_PD_0:
117 return pVM->pgm.s.apGCPaePDs[0];
118 case PGMPOOL_IDX_PAE_PD_1:
119 return pVM->pgm.s.apGCPaePDs[1];
120 case PGMPOOL_IDX_PAE_PD_2:
121 return pVM->pgm.s.apGCPaePDs[2];
122 case PGMPOOL_IDX_PAE_PD_3:
123 return pVM->pgm.s.apGCPaePDs[3];
124 case PGMPOOL_IDX_PDPT:
125 return pVM->pgm.s.pGCPaePDPT;
126 default:
127 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
128 return NULL;
129 }
130
131# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
132 RTHCPHYS HCPhys;
133 switch (pPage->idx)
134 {
135 case PGMPOOL_IDX_PD:
136 HCPhys = pVM->pgm.s.HCPhys32BitPD;
137 break;
138 case PGMPOOL_IDX_PAE_PD:
139 case PGMPOOL_IDX_PAE_PD_0:
140 HCPhys = pVM->pgm.s.aHCPhysPaePDs[0];
141 break;
142 case PGMPOOL_IDX_PAE_PD_1:
143 HCPhys = pVM->pgm.s.aHCPhysPaePDs[1];
144 break;
145 case PGMPOOL_IDX_PAE_PD_2:
146 HCPhys = pVM->pgm.s.aHCPhysPaePDs[2];
147 break;
148 case PGMPOOL_IDX_PAE_PD_3:
149 HCPhys = pVM->pgm.s.aHCPhysPaePDs[3];
150 break;
151 case PGMPOOL_IDX_PDPT:
152 HCPhys = pVM->pgm.s.HCPhysPaePDPT;
153 break;
154 default:
155 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
156 return NULL;
157 }
158 void *pv;
159 int rc = PGMDynMapHCPage(pVM, pPage->Core.Key, &pv);
160 AssertReleaseRC(rc);
161 return pv;
162# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
163}
164#endif /* IN_GC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
165
166
167#ifdef PGMPOOL_WITH_MONITORING
168/**
169 * Determin the size of a write instruction.
170 * @returns number of bytes written.
171 * @param pDis The disassembler state.
172 */
173static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
174{
175 /*
176 * This is very crude and possibly wrong for some opcodes,
177 * but since it's not really supposed to be called we can
178 * probably live with that.
179 */
180 return DISGetParamSize(pDis, &pDis->param1);
181}
182
183
184/**
185 * Flushes a chain of pages sharing the same access monitor.
186 *
187 * @returns VBox status code suitable for scheduling.
188 * @param pPool The pool.
189 * @param pPage A page in the chain.
190 */
191int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
192{
193 LogFlow(("pgmPoolMonitorChainFlush: Flush page %VGp type=%d\n", pPage->GCPhys, pPage->enmKind));
194
195 /*
196 * Find the list head.
197 */
198 uint16_t idx = pPage->idx;
199 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
200 {
201 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
202 {
203 idx = pPage->iMonitoredPrev;
204 Assert(idx != pPage->idx);
205 pPage = &pPool->aPages[idx];
206 }
207 }
208
209 /*
210 * Iterate the list flushing each shadow page.
211 */
212 int rc = VINF_SUCCESS;
213 for (;;)
214 {
215 idx = pPage->iMonitoredNext;
216 Assert(idx != pPage->idx);
217 if (pPage->idx >= PGMPOOL_IDX_FIRST)
218 {
219 int rc2 = pgmPoolFlushPage(pPool, pPage);
220 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
221 rc = VINF_PGM_SYNC_CR3;
222 }
223 /* next */
224 if (idx == NIL_PGMPOOL_IDX)
225 break;
226 pPage = &pPool->aPages[idx];
227 }
228 return rc;
229}
230
231
232/**
233 * Wrapper for getting the current context pointer to the entry being modified.
234 *
235 * @returns Pointer to the current context mapping of the entry.
236 * @param pPool The pool.
237 * @param pvFault The fault virtual address.
238 * @param GCPhysFault The fault physical address.
239 * @param cbEntry The entry size.
240 */
241#ifdef IN_RING3
242DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
243#else
244DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
245#endif
246{
247#ifdef IN_GC
248 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
249
250#elif defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
251 void *pvRet;
252 int rc = PGMDynMapGCPageOff(pPool->pVMR0, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
253 AssertFatalRCSuccess(rc);
254 return pvRet;
255
256#elif defined(IN_RING0)
257 void *pvRet;
258 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMR0->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
259 AssertFatalRCSuccess(rc);
260 return pvRet;
261
262#elif defined(IN_RING3)
263 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
264#else
265# error "huh?"
266#endif
267}
268
269
270/**
271 * Process shadow entries before they are changed by the guest.
272 *
273 * For PT entries we will clear them. For PD entries, we'll simply check
274 * for mapping conflicts and set the SyncCR3 FF if found.
275 *
276 * @param pPool The pool.
277 * @param pPage The head page.
278 * @param GCPhysFault The guest physical fault address.
279 * @param uAddress In R0 and GC this is the guest context fault address (flat).
280 * In R3 this is the host context 'fault' address.
281 * @param pCpu The disassembler state for figuring out the write size.
282 * This need not be specified if the caller knows we won't do cross entry accesses.
283 */
284#ifdef IN_RING3
285void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
286#else
287void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
288#endif
289{
290 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
291 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
292 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
293
294 LogFlow(("pgmPoolMonitorChainChanging: %VGv phys=%VGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
295
296 for (;;)
297 {
298 union
299 {
300 void *pv;
301 PX86PT pPT;
302 PX86PTPAE pPTPae;
303 PX86PD pPD;
304 PX86PDPAE pPDPae;
305 PX86PDPT pPDPT;
306 PX86PML4 pPML4;
307 } uShw;
308 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
309
310 switch (pPage->enmKind)
311 {
312 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
313 {
314 const unsigned iShw = off / sizeof(X86PTE);
315 if (uShw.pPT->a[iShw].n.u1Present)
316 {
317# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
318 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
319 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
320 pgmPoolTracDerefGCPhysHint(pPool, pPage,
321 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
322 pGstPte->u & X86_PTE_PG_MASK);
323# endif
324 uShw.pPT->a[iShw].u = 0;
325 }
326 break;
327 }
328
329 /* page/2 sized */
330 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
331 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
332 {
333 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
334 if (uShw.pPTPae->a[iShw].n.u1Present)
335 {
336# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
337 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
338 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
339 pgmPoolTracDerefGCPhysHint(pPool, pPage,
340 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
341 pGstPte->u & X86_PTE_PG_MASK);
342# endif
343 uShw.pPTPae->a[iShw].u = 0;
344 }
345 }
346 break;
347
348 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
349 {
350 const unsigned iShw = off / sizeof(X86PTEPAE);
351 if (uShw.pPTPae->a[iShw].n.u1Present)
352 {
353# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
354 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
355 Log4(("pgmPoolMonitorChainChanging pae: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
356 pgmPoolTracDerefGCPhysHint(pPool, pPage,
357 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
358 pGstPte->u & X86_PTE_PAE_PG_MASK);
359# endif
360 uShw.pPTPae->a[iShw].u = 0;
361 }
362
363 /* paranoia / a bit assumptive. */
364 if ( pCpu
365 && (off & 7)
366 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
367 {
368 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
369 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
370
371 if (uShw.pPTPae->a[iShw2].n.u1Present)
372 {
373# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
374 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
375 Log4(("pgmPoolMonitorChainChanging pae: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
376 pgmPoolTracDerefGCPhysHint(pPool, pPage,
377 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
378 pGstPte->u & X86_PTE_PAE_PG_MASK);
379# endif
380 uShw.pPTPae->a[iShw2].u = 0;
381 }
382 }
383
384 break;
385 }
386
387 case PGMPOOLKIND_ROOT_32BIT_PD:
388 {
389 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
390 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
391 {
392 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
393 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
394 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
395 }
396 /* paranoia / a bit assumptive. */
397 else if ( pCpu
398 && (off & 3)
399 && (off & 3) + cbWrite > sizeof(X86PTE))
400 {
401 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
402 if ( iShw2 != iShw
403 && iShw2 < RT_ELEMENTS(uShw.pPD->a)
404 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
405 {
406 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
407 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
408 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
409 }
410 }
411#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
412 if ( uShw.pPD->a[iShw].n.u1Present
413 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
414 {
415 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
416# ifdef IN_GC /* TLB load - we're pushing things a bit... */
417 ASMProbeReadByte(pvAddress);
418# endif
419 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
420 uShw.pPD->a[iShw].u = 0;
421 }
422#endif
423 break;
424 }
425
426 case PGMPOOLKIND_ROOT_PAE_PD:
427 {
428 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
429 for (unsigned i = 0; i < 2; i++, iShw++)
430 {
431 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
432 {
433 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
434 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
435 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
436 }
437 /* paranoia / a bit assumptive. */
438 else if ( pCpu
439 && (off & 3)
440 && (off & 3) + cbWrite > 4)
441 {
442 const unsigned iShw2 = iShw + 2;
443 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a)
444 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
445 {
446 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
447 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
448 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
449 }
450 }
451#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
452 if ( uShw.pPDPae->a[iShw].n.u1Present
453 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
454 {
455 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
456# ifdef IN_GC /* TLB load - we're pushing things a bit... */
457 ASMProbeReadByte(pvAddress);
458# endif
459 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
460 uShw.pPDPae->a[iShw].u = 0;
461 }
462#endif
463 }
464 break;
465 }
466
467 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
468 {
469 const unsigned iShw = off / sizeof(X86PDEPAE);
470 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
471 {
472 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
473 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
474 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
475 }
476#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
477 /*
478 * Causes trouble when the guest uses a PDE to refer to the whole page table level
479 * structure. (Invalidate here; faults later on when it tries to change the page
480 * table entries -> recheck; probably only applies to the RC case.)
481 */
482 else
483 {
484 if (uShw.pPDPae->a[iShw].n.u1Present)
485 {
486 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
487 pgmPoolFree(pPool->CTX_SUFF(pVM),
488 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
489 /* Note: hardcoded PAE implementation dependency */
490 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
491 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
492 uShw.pPDPae->a[iShw].u = 0;
493 }
494 }
495#endif
496 /* paranoia / a bit assumptive. */
497 if ( pCpu
498 && (off & 7)
499 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
500 {
501 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
502 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
503
504 if ( iShw2 != iShw
505 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
506 {
507 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
508 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
509 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
510 }
511#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
512 else if (uShw.pPDPae->a[iShw2].n.u1Present)
513 {
514 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
515 pgmPoolFree(pPool->CTX_SUFF(pVM),
516 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
517 /* Note: hardcoded PAE implementation dependency */
518 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
519 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
520 uShw.pPDPae->a[iShw2].u = 0;
521 }
522#endif
523 }
524 break;
525 }
526
527 case PGMPOOLKIND_ROOT_PDPT:
528 {
529 /*
530 * Hopefully this doesn't happen very often:
531 * - touching unused parts of the page
532 * - messing with the bits of pd pointers without changing the physical address
533 */
534 const unsigned iShw = off / sizeof(X86PDPE);
535 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
536 {
537 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
538 {
539 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
540 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
541 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
542 }
543 /* paranoia / a bit assumptive. */
544 else if ( pCpu
545 && (off & 7)
546 && (off & 7) + cbWrite > sizeof(X86PDPE))
547 {
548 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
549 if ( iShw2 != iShw
550 && iShw2 < X86_PG_PAE_PDPE_ENTRIES
551 && uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
552 {
553 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
554 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
555 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
556 }
557 }
558 }
559 break;
560 }
561
562#ifndef IN_GC
563 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
564 {
565 Assert(pPage->enmKind == PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD);
566
567 const unsigned iShw = off / sizeof(X86PDEPAE);
568 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
569 {
570 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
571 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
572 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
573 }
574 else
575 {
576 if (uShw.pPDPae->a[iShw].n.u1Present)
577 {
578 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
579 pgmPoolFree(pPool->CTX_SUFF(pVM),
580 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
581 pPage->idx,
582 iShw);
583 uShw.pPDPae->a[iShw].u = 0;
584 }
585 }
586 /* paranoia / a bit assumptive. */
587 if ( pCpu
588 && (off & 7)
589 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
590 {
591 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
592 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
593
594 if ( iShw2 != iShw
595 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
596 {
597 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
598 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
599 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
600 }
601 else
602 if (uShw.pPDPae->a[iShw2].n.u1Present)
603 {
604 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
605 pgmPoolFree(pPool->CTX_SUFF(pVM),
606 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
607 pPage->idx,
608 iShw2);
609 uShw.pPDPae->a[iShw2].u = 0;
610 }
611 }
612 break;
613 }
614
615 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
616 {
617 /*
618 * Hopefully this doesn't happen very often:
619 * - messing with the bits of pd pointers without changing the physical address
620 */
621 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
622 {
623 const unsigned iShw = off / sizeof(X86PDPE);
624 if (uShw.pPDPT->a[iShw].n.u1Present)
625 {
626 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
627 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
628 uShw.pPDPT->a[iShw].u = 0;
629 }
630 /* paranoia / a bit assumptive. */
631 if ( pCpu
632 && (off & 7)
633 && (off & 7) + cbWrite > sizeof(X86PDPE))
634 {
635 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
636 if (uShw.pPDPT->a[iShw2].n.u1Present)
637 {
638 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
639 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
640 uShw.pPDPT->a[iShw2].u = 0;
641 }
642 }
643 }
644 break;
645 }
646
647 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
648 {
649 /*
650 * Hopefully this doesn't happen very often:
651 * - messing with the bits of pd pointers without changing the physical address
652 */
653 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
654 {
655 const unsigned iShw = off / sizeof(X86PDPE);
656 if (uShw.pPML4->a[iShw].n.u1Present)
657 {
658 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
659 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
660 uShw.pPML4->a[iShw].u = 0;
661 }
662 /* paranoia / a bit assumptive. */
663 if ( pCpu
664 && (off & 7)
665 && (off & 7) + cbWrite > sizeof(X86PDPE))
666 {
667 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
668 if (uShw.pPML4->a[iShw2].n.u1Present)
669 {
670 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
671 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
672 uShw.pPML4->a[iShw2].u = 0;
673 }
674 }
675 }
676 break;
677 }
678#endif /* IN_RING0 */
679
680 default:
681 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
682 }
683
684 /* next */
685 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
686 return;
687 pPage = &pPool->aPages[pPage->iMonitoredNext];
688 }
689}
690
691
692# ifndef IN_RING3
693/**
694 * Checks if a access could be a fork operation in progress.
695 *
696 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
697 *
698 * @returns true if it's likly that we're forking, otherwise false.
699 * @param pPool The pool.
700 * @param pCpu The disassembled instruction.
701 * @param offFault The access offset.
702 */
703DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
704{
705 /*
706 * i386 linux is using btr to clear X86_PTE_RW.
707 * The functions involved are (2.6.16 source inspection):
708 * clear_bit
709 * ptep_set_wrprotect
710 * copy_one_pte
711 * copy_pte_range
712 * copy_pmd_range
713 * copy_pud_range
714 * copy_page_range
715 * dup_mmap
716 * dup_mm
717 * copy_mm
718 * copy_process
719 * do_fork
720 */
721 if ( pCpu->pCurInstr->opcode == OP_BTR
722 && !(offFault & 4)
723 /** @todo Validate that the bit index is X86_PTE_RW. */
724 )
725 {
726 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
727 return true;
728 }
729 return false;
730}
731
732
733/**
734 * Determine whether the page is likely to have been reused.
735 *
736 * @returns true if we consider the page as being reused for a different purpose.
737 * @returns false if we consider it to still be a paging page.
738 * @param pVM VM Handle.
739 * @param pPage The page in question.
740 * @param pRegFrame Trap register frame.
741 * @param pCpu The disassembly info for the faulting instruction.
742 * @param pvFault The fault address.
743 *
744 * @remark The REP prefix check is left to the caller because of STOSD/W.
745 */
746DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
747{
748#ifndef IN_GC
749 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
750 if ( HWACCMHasPendingIrq(pVM)
751 && (pRegFrame->rsp - pvFault) < 32)
752 {
753 /* Fault caused by stack writes while trying to inject an interrupt event. */
754 Log(("pgmPoolMonitorIsReused: reused %VGv for interrupt stack (rsp=%VGv).\n", pvFault, pRegFrame->rsp));
755 return true;
756 }
757#else
758 NOREF(pVM); NOREF(pvFault);
759#endif
760
761 switch (pCpu->pCurInstr->opcode)
762 {
763 /* call implies the actual push of the return address faulted */
764 case OP_CALL:
765 Log4(("pgmPoolMonitorIsReused: CALL\n"));
766 return true;
767 case OP_PUSH:
768 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
769 return true;
770 case OP_PUSHF:
771 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
772 return true;
773 case OP_PUSHA:
774 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
775 return true;
776 case OP_FXSAVE:
777 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
778 return true;
779 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
780 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
781 return true;
782 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
783 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
784 return true;
785 case OP_MOVSWD:
786 case OP_STOSWD:
787 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
788 && pRegFrame->rcx >= 0x40
789 )
790 {
791 Assert(pCpu->mode == CPUMODE_64BIT);
792
793 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
794 return true;
795 }
796 return false;
797 }
798 if ( (pCpu->param1.flags & USE_REG_GEN32)
799 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
800 {
801 Log4(("pgmPoolMonitorIsReused: ESP\n"));
802 return true;
803 }
804
805 //if (pPage->fCR3Mix)
806 // return false;
807 return false;
808}
809
810
811/**
812 * Flushes the page being accessed.
813 *
814 * @returns VBox status code suitable for scheduling.
815 * @param pVM The VM handle.
816 * @param pPool The pool.
817 * @param pPage The pool page (head).
818 * @param pCpu The disassembly of the write instruction.
819 * @param pRegFrame The trap register frame.
820 * @param GCPhysFault The fault address as guest physical address.
821 * @param pvFault The fault address.
822 */
823static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
824 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
825{
826 /*
827 * First, do the flushing.
828 */
829 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
830
831 /*
832 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
833 */
834 uint32_t cbWritten;
835 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
836 if (VBOX_SUCCESS(rc2))
837 pRegFrame->rip += pCpu->opsize;
838 else if (rc2 == VERR_EM_INTERPRETER)
839 {
840#ifdef IN_GC
841 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
842 {
843 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
844 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
845 rc = VINF_SUCCESS;
846 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
847 }
848 else
849#endif
850 {
851 rc = VINF_EM_RAW_EMULATE_INSTR;
852 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
853 }
854 }
855 else
856 rc = rc2;
857
858 /* See use in pgmPoolAccessHandlerSimple(). */
859 PGM_INVL_GUEST_TLBS();
860
861 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
862 return rc;
863
864}
865
866
867/**
868 * Handles the STOSD write accesses.
869 *
870 * @returns VBox status code suitable for scheduling.
871 * @param pVM The VM handle.
872 * @param pPool The pool.
873 * @param pPage The pool page (head).
874 * @param pCpu The disassembly of the write instruction.
875 * @param pRegFrame The trap register frame.
876 * @param GCPhysFault The fault address as guest physical address.
877 * @param pvFault The fault address.
878 */
879DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
880 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
881{
882 Assert(pCpu->mode == CPUMODE_32BIT);
883
884 /*
885 * Increment the modification counter and insert it into the list
886 * of modified pages the first time.
887 */
888 if (!pPage->cModifications++)
889 pgmPoolMonitorModifiedInsert(pPool, pPage);
890
891 /*
892 * Execute REP STOSD.
893 *
894 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
895 * write situation, meaning that it's safe to write here.
896 */
897 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
898 while (pRegFrame->ecx)
899 {
900 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
901#ifdef IN_GC
902 *(uint32_t *)pu32 = pRegFrame->eax;
903#else
904 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
905#endif
906 pu32 += 4;
907 GCPhysFault += 4;
908 pRegFrame->edi += 4;
909 pRegFrame->ecx--;
910 }
911 pRegFrame->rip += pCpu->opsize;
912
913 /* See use in pgmPoolAccessHandlerSimple(). */
914 PGM_INVL_GUEST_TLBS();
915
916 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
917 return VINF_SUCCESS;
918}
919
920
921/**
922 * Handles the simple write accesses.
923 *
924 * @returns VBox status code suitable for scheduling.
925 * @param pVM The VM handle.
926 * @param pPool The pool.
927 * @param pPage The pool page (head).
928 * @param pCpu The disassembly of the write instruction.
929 * @param pRegFrame The trap register frame.
930 * @param GCPhysFault The fault address as guest physical address.
931 * @param pvFault The fault address.
932 */
933DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
934 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
935{
936 /*
937 * Increment the modification counter and insert it into the list
938 * of modified pages the first time.
939 */
940 if (!pPage->cModifications++)
941 pgmPoolMonitorModifiedInsert(pPool, pPage);
942
943 /*
944 * Clear all the pages. ASSUMES that pvFault is readable.
945 */
946 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
947
948 /*
949 * Interpret the instruction.
950 */
951 uint32_t cb;
952 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
953 if (VBOX_SUCCESS(rc))
954 pRegFrame->rip += pCpu->opsize;
955 else if (rc == VERR_EM_INTERPRETER)
956 {
957 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
958 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
959 rc = VINF_EM_RAW_EMULATE_INSTR;
960 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
961 }
962
963 /*
964 * Quick hack, with logging enabled we're getting stale
965 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
966 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
967 * have to be fixed to support this. But that'll have to wait till next week.
968 *
969 * An alternative is to keep track of the changed PTEs together with the
970 * GCPhys from the guest PT. This may proove expensive though.
971 *
972 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
973 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
974 */
975 PGM_INVL_GUEST_TLBS();
976
977 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
978 return rc;
979}
980
981
982/**
983 * \#PF Handler callback for PT write accesses.
984 *
985 * @returns VBox status code (appropriate for GC return).
986 * @param pVM VM Handle.
987 * @param uErrorCode CPU Error code.
988 * @param pRegFrame Trap register frame.
989 * NULL on DMA and other non CPU access.
990 * @param pvFault The fault address (cr2).
991 * @param GCPhysFault The GC physical address corresponding to pvFault.
992 * @param pvUser User argument.
993 */
994DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
995{
996 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
997 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
998 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
999 LogFlow(("pgmPoolAccessHandler: pvFault=%VGv pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1000
1001 /*
1002 * We should ALWAYS have the list head as user parameter. This
1003 * is because we use that page to record the changes.
1004 */
1005 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1006
1007 /*
1008 * Disassemble the faulting instruction.
1009 */
1010 DISCPUSTATE Cpu;
1011 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1012 AssertRCReturn(rc, rc);
1013
1014 /*
1015 * Check if it's worth dealing with.
1016 */
1017 bool fReused = false;
1018 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1019 || pPage->fCR3Mix)
1020 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1021 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1022 {
1023 /*
1024 * Simple instructions, no REP prefix.
1025 */
1026 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1027 {
1028 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1029 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1030 return rc;
1031 }
1032
1033 /*
1034 * Windows is frequently doing small memset() operations (netio test 4k+).
1035 * We have to deal with these or we'll kill the cache and performance.
1036 */
1037 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1038 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1039 && pRegFrame->ecx <= 0x20
1040 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1041 && !((uintptr_t)pvFault & 3)
1042 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1043 && Cpu.mode == CPUMODE_32BIT
1044 && Cpu.opmode == CPUMODE_32BIT
1045 && Cpu.addrmode == CPUMODE_32BIT
1046 && Cpu.prefix == PREFIX_REP
1047 && !pRegFrame->eflags.Bits.u1DF
1048 )
1049 {
1050 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1051 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1052 return rc;
1053 }
1054
1055 /* REP prefix, don't bother. */
1056 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1057 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%VGv opcode=%d prefix=%#x\n",
1058 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1059 }
1060
1061 /*
1062 * Not worth it, so flush it.
1063 *
1064 * If we considered it to be reused, don't to back to ring-3
1065 * to emulate failed instructions since we usually cannot
1066 * interpret then. This may be a bit risky, in which case
1067 * the reuse detection must be fixed.
1068 */
1069 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1070 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1071 rc = VINF_SUCCESS;
1072 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1073 return rc;
1074}
1075
1076# endif /* !IN_RING3 */
1077#endif /* PGMPOOL_WITH_MONITORING */
1078
1079#ifdef PGMPOOL_WITH_CACHE
1080
1081/**
1082 * Inserts a page into the GCPhys hash table.
1083 *
1084 * @param pPool The pool.
1085 * @param pPage The page.
1086 */
1087DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1088{
1089 Log3(("pgmPoolHashInsert: %VGp\n", pPage->GCPhys));
1090 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1091 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1092 pPage->iNext = pPool->aiHash[iHash];
1093 pPool->aiHash[iHash] = pPage->idx;
1094}
1095
1096
1097/**
1098 * Removes a page from the GCPhys hash table.
1099 *
1100 * @param pPool The pool.
1101 * @param pPage The page.
1102 */
1103DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1104{
1105 Log3(("pgmPoolHashRemove: %VGp\n", pPage->GCPhys));
1106 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1107 if (pPool->aiHash[iHash] == pPage->idx)
1108 pPool->aiHash[iHash] = pPage->iNext;
1109 else
1110 {
1111 uint16_t iPrev = pPool->aiHash[iHash];
1112 for (;;)
1113 {
1114 const int16_t i = pPool->aPages[iPrev].iNext;
1115 if (i == pPage->idx)
1116 {
1117 pPool->aPages[iPrev].iNext = pPage->iNext;
1118 break;
1119 }
1120 if (i == NIL_PGMPOOL_IDX)
1121 {
1122 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1123 break;
1124 }
1125 iPrev = i;
1126 }
1127 }
1128 pPage->iNext = NIL_PGMPOOL_IDX;
1129}
1130
1131
1132/**
1133 * Frees up one cache page.
1134 *
1135 * @returns VBox status code.
1136 * @retval VINF_SUCCESS on success.
1137 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1138 * @param pPool The pool.
1139 * @param iUser The user index.
1140 */
1141static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1142{
1143#ifndef IN_GC
1144 const PVM pVM = pPool->CTX_SUFF(pVM);
1145#endif
1146 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1147 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1148
1149 /*
1150 * Select one page from the tail of the age list.
1151 */
1152 uint16_t iToFree = pPool->iAgeTail;
1153 if (iToFree == iUser)
1154 iToFree = pPool->aPages[iToFree].iAgePrev;
1155/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1156 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1157 {
1158 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1159 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1160 {
1161 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1162 continue;
1163 iToFree = i;
1164 break;
1165 }
1166 }
1167*/
1168 Assert(iToFree != iUser);
1169 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1170
1171 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
1172 if (rc == VINF_SUCCESS)
1173 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1174 return rc;
1175}
1176
1177
1178/**
1179 * Checks if a kind mismatch is really a page being reused
1180 * or if it's just normal remappings.
1181 *
1182 * @returns true if reused and the cached page (enmKind1) should be flushed
1183 * @returns false if not reused.
1184 * @param enmKind1 The kind of the cached page.
1185 * @param enmKind2 The kind of the requested page.
1186 */
1187static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1188{
1189 switch (enmKind1)
1190 {
1191 /*
1192 * Never reuse them. There is no remapping in non-paging mode.
1193 */
1194 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1195 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1196 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1197 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1198 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1199 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1200 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1201 return true;
1202
1203 /*
1204 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1205 */
1206 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1207 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1208 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1209 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1210 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1211 switch (enmKind2)
1212 {
1213 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1214 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1215 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1216 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1217 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1218 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1219 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1220 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1221 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1222 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1223 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1224 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1225 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1226 return true;
1227 default:
1228 return false;
1229 }
1230
1231 /*
1232 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1233 */
1234 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1235 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1236 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1237 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1238 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1239 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1240 switch (enmKind2)
1241 {
1242 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1243 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1244 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1245 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1246 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1247 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1248 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1249 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1250 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1251 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1252 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1253 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1254 return true;
1255 default:
1256 return false;
1257 }
1258
1259 /*
1260 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1261 */
1262 case PGMPOOLKIND_ROOT_32BIT_PD:
1263 case PGMPOOLKIND_ROOT_PAE_PD:
1264 case PGMPOOLKIND_ROOT_PDPT:
1265 case PGMPOOLKIND_ROOT_NESTED:
1266 return false;
1267
1268 default:
1269 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1270 }
1271}
1272
1273
1274/**
1275 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1276 *
1277 * @returns VBox status code.
1278 * @retval VINF_PGM_CACHED_PAGE on success.
1279 * @retval VERR_FILE_NOT_FOUND if not found.
1280 * @param pPool The pool.
1281 * @param GCPhys The GC physical address of the page we're gonna shadow.
1282 * @param enmKind The kind of mapping.
1283 * @param iUser The shadow page pool index of the user table.
1284 * @param iUserTable The index into the user table (shadowed).
1285 * @param ppPage Where to store the pointer to the page.
1286 */
1287static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1288{
1289#ifndef IN_GC
1290 const PVM pVM = pPool->CTX_SUFF(pVM);
1291#endif
1292 /*
1293 * Look up the GCPhys in the hash.
1294 */
1295 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1296 Log3(("pgmPoolCacheAlloc: %VGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1297 if (i != NIL_PGMPOOL_IDX)
1298 {
1299 do
1300 {
1301 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1302 Log3(("pgmPoolCacheAlloc: slot %d found page %VGp\n", i, pPage->GCPhys));
1303 if (pPage->GCPhys == GCPhys)
1304 {
1305 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1306 {
1307 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1308 if (VBOX_SUCCESS(rc))
1309 {
1310 *ppPage = pPage;
1311 STAM_COUNTER_INC(&pPool->StatCacheHits);
1312 return VINF_PGM_CACHED_PAGE;
1313 }
1314 return rc;
1315 }
1316
1317 /*
1318 * The kind is different. In some cases we should now flush the page
1319 * as it has been reused, but in most cases this is normal remapping
1320 * of PDs as PT or big pages using the GCPhys field in a slightly
1321 * different way than the other kinds.
1322 */
1323 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1324 {
1325 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1326 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1327 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1328 break;
1329 }
1330 }
1331
1332 /* next */
1333 i = pPage->iNext;
1334 } while (i != NIL_PGMPOOL_IDX);
1335 }
1336
1337 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1338 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1339 return VERR_FILE_NOT_FOUND;
1340}
1341
1342
1343/**
1344 * Inserts a page into the cache.
1345 *
1346 * @param pPool The pool.
1347 * @param pPage The cached page.
1348 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1349 */
1350static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1351{
1352 /*
1353 * Insert into the GCPhys hash if the page is fit for that.
1354 */
1355 Assert(!pPage->fCached);
1356 if (fCanBeCached)
1357 {
1358 pPage->fCached = true;
1359 pgmPoolHashInsert(pPool, pPage);
1360 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1361 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1362 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1363 }
1364 else
1365 {
1366 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1367 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1368 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1369 }
1370
1371 /*
1372 * Insert at the head of the age list.
1373 */
1374 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1375 pPage->iAgeNext = pPool->iAgeHead;
1376 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1377 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1378 else
1379 pPool->iAgeTail = pPage->idx;
1380 pPool->iAgeHead = pPage->idx;
1381}
1382
1383
1384/**
1385 * Flushes a cached page.
1386 *
1387 * @param pPool The pool.
1388 * @param pPage The cached page.
1389 */
1390static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1391{
1392 Log3(("pgmPoolCacheFlushPage: %VGp\n", pPage->GCPhys));
1393
1394 /*
1395 * Remove the page from the hash.
1396 */
1397 if (pPage->fCached)
1398 {
1399 pPage->fCached = false;
1400 pgmPoolHashRemove(pPool, pPage);
1401 }
1402 else
1403 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1404
1405 /*
1406 * Remove it from the age list.
1407 */
1408 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1409 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1410 else
1411 pPool->iAgeTail = pPage->iAgePrev;
1412 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1413 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1414 else
1415 pPool->iAgeHead = pPage->iAgeNext;
1416 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1417 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1418}
1419
1420#endif /* PGMPOOL_WITH_CACHE */
1421#ifdef PGMPOOL_WITH_MONITORING
1422
1423/**
1424 * Looks for pages sharing the monitor.
1425 *
1426 * @returns Pointer to the head page.
1427 * @returns NULL if not found.
1428 * @param pPool The Pool
1429 * @param pNewPage The page which is going to be monitored.
1430 */
1431static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1432{
1433#ifdef PGMPOOL_WITH_CACHE
1434 /*
1435 * Look up the GCPhys in the hash.
1436 */
1437 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1438 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1439 if (i == NIL_PGMPOOL_IDX)
1440 return NULL;
1441 do
1442 {
1443 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1444 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1445 && pPage != pNewPage)
1446 {
1447 switch (pPage->enmKind)
1448 {
1449 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1450 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1451 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1452 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1453 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1454 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1455 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1456 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1457 case PGMPOOLKIND_ROOT_32BIT_PD:
1458 case PGMPOOLKIND_ROOT_PAE_PD:
1459 case PGMPOOLKIND_ROOT_PDPT:
1460 {
1461 /* find the head */
1462 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1463 {
1464 Assert(pPage->iMonitoredPrev != pPage->idx);
1465 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1466 }
1467 return pPage;
1468 }
1469
1470 /* ignore, no monitoring. */
1471 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1472 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1473 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1474 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1475 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1476 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1477 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1478 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1479 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1480 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1481 case PGMPOOLKIND_ROOT_NESTED:
1482 break;
1483 default:
1484 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1485 }
1486 }
1487
1488 /* next */
1489 i = pPage->iNext;
1490 } while (i != NIL_PGMPOOL_IDX);
1491#endif
1492 return NULL;
1493}
1494
1495
1496/**
1497 * Enabled write monitoring of a guest page.
1498 *
1499 * @returns VBox status code.
1500 * @retval VINF_SUCCESS on success.
1501 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1502 * @param pPool The pool.
1503 * @param pPage The cached page.
1504 */
1505static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1506{
1507 LogFlow(("pgmPoolMonitorInsert %VGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1508
1509 /*
1510 * Filter out the relevant kinds.
1511 */
1512 switch (pPage->enmKind)
1513 {
1514 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1515 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1516 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1517 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1518 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1519 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1520 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1521 case PGMPOOLKIND_ROOT_PDPT:
1522 break;
1523
1524 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1525 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1526 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1527 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1528 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1529 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1530 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1531 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1532 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1533 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1534 case PGMPOOLKIND_ROOT_NESTED:
1535 /* Nothing to monitor here. */
1536 return VINF_SUCCESS;
1537
1538 case PGMPOOLKIND_ROOT_32BIT_PD:
1539 case PGMPOOLKIND_ROOT_PAE_PD:
1540#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1541 break;
1542#endif
1543 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1544 default:
1545 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1546 }
1547
1548 /*
1549 * Install handler.
1550 */
1551 int rc;
1552 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1553 if (pPageHead)
1554 {
1555 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1556 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1557 pPage->iMonitoredPrev = pPageHead->idx;
1558 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1559 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1560 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1561 pPageHead->iMonitoredNext = pPage->idx;
1562 rc = VINF_SUCCESS;
1563 }
1564 else
1565 {
1566 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1567 PVM pVM = pPool->CTX_SUFF(pVM);
1568 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1569 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1570 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1571 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1572 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1573 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1574 pPool->pszAccessHandler);
1575 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1576 * the heap size should suffice. */
1577 AssertFatalRC(rc);
1578 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1579 rc = VERR_PGM_POOL_CLEARED;
1580 }
1581 pPage->fMonitored = true;
1582 return rc;
1583}
1584
1585
1586/**
1587 * Disables write monitoring of a guest page.
1588 *
1589 * @returns VBox status code.
1590 * @retval VINF_SUCCESS on success.
1591 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1592 * @param pPool The pool.
1593 * @param pPage The cached page.
1594 */
1595static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1596{
1597 /*
1598 * Filter out the relevant kinds.
1599 */
1600 switch (pPage->enmKind)
1601 {
1602 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1603 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1604 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1605 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1606 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1607 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1608 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1609 case PGMPOOLKIND_ROOT_PDPT:
1610 break;
1611
1612 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1613 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1614 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1615 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1616 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1617 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1618 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1619 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1620 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1621 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1622 case PGMPOOLKIND_ROOT_NESTED:
1623 /* Nothing to monitor here. */
1624 return VINF_SUCCESS;
1625
1626 case PGMPOOLKIND_ROOT_32BIT_PD:
1627 case PGMPOOLKIND_ROOT_PAE_PD:
1628#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1629 break;
1630#endif
1631 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1632 default:
1633 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1634 }
1635
1636 /*
1637 * Remove the page from the monitored list or uninstall it if last.
1638 */
1639 const PVM pVM = pPool->CTX_SUFF(pVM);
1640 int rc;
1641 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1642 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1643 {
1644 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1645 {
1646 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1647 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1648 pNewHead->fCR3Mix = pPage->fCR3Mix;
1649 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1650 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1651 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1652 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
1653 pPool->pszAccessHandler);
1654 AssertFatalRCSuccess(rc);
1655 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1656 }
1657 else
1658 {
1659 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1660 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1661 {
1662 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1663 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1664 }
1665 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1666 rc = VINF_SUCCESS;
1667 }
1668 }
1669 else
1670 {
1671 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1672 AssertFatalRC(rc);
1673 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1674 rc = VERR_PGM_POOL_CLEARED;
1675 }
1676 pPage->fMonitored = false;
1677
1678 /*
1679 * Remove it from the list of modified pages (if in it).
1680 */
1681 pgmPoolMonitorModifiedRemove(pPool, pPage);
1682
1683 return rc;
1684}
1685
1686# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1687
1688/**
1689 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1690 *
1691 * @param pPool The Pool.
1692 * @param pPage A page in the chain.
1693 * @param fCR3Mix The new fCR3Mix value.
1694 */
1695static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1696{
1697 /* current */
1698 pPage->fCR3Mix = fCR3Mix;
1699
1700 /* before */
1701 int16_t idx = pPage->iMonitoredPrev;
1702 while (idx != NIL_PGMPOOL_IDX)
1703 {
1704 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1705 idx = pPool->aPages[idx].iMonitoredPrev;
1706 }
1707
1708 /* after */
1709 idx = pPage->iMonitoredNext;
1710 while (idx != NIL_PGMPOOL_IDX)
1711 {
1712 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1713 idx = pPool->aPages[idx].iMonitoredNext;
1714 }
1715}
1716
1717
1718/**
1719 * Installs or modifies monitoring of a CR3 page (special).
1720 *
1721 * We're pretending the CR3 page is shadowed by the pool so we can use the
1722 * generic mechanisms in detecting chained monitoring. (This also gives us a
1723 * tast of what code changes are required to really pool CR3 shadow pages.)
1724 *
1725 * @returns VBox status code.
1726 * @param pPool The pool.
1727 * @param idxRoot The CR3 (root) page index.
1728 * @param GCPhysCR3 The (new) CR3 value.
1729 */
1730int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1731{
1732 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1733 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1734 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1735 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1736
1737 /*
1738 * The unlikely case where it already matches.
1739 */
1740 if (pPage->GCPhys == GCPhysCR3)
1741 {
1742 Assert(pPage->fMonitored);
1743 return VINF_SUCCESS;
1744 }
1745
1746 /*
1747 * Flush the current monitoring and remove it from the hash.
1748 */
1749 int rc = VINF_SUCCESS;
1750 if (pPage->fMonitored)
1751 {
1752 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1753 rc = pgmPoolMonitorFlush(pPool, pPage);
1754 if (rc == VERR_PGM_POOL_CLEARED)
1755 rc = VINF_SUCCESS;
1756 else
1757 AssertFatalRC(rc);
1758 pgmPoolHashRemove(pPool, pPage);
1759 }
1760
1761 /*
1762 * Monitor the page at the new location and insert it into the hash.
1763 */
1764 pPage->GCPhys = GCPhysCR3;
1765 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1766 if (rc2 != VERR_PGM_POOL_CLEARED)
1767 {
1768 AssertFatalRC(rc2);
1769 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1770 rc = rc2;
1771 }
1772 pgmPoolHashInsert(pPool, pPage);
1773 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1774 return rc;
1775}
1776
1777
1778/**
1779 * Removes the monitoring of a CR3 page (special).
1780 *
1781 * @returns VBox status code.
1782 * @param pPool The pool.
1783 * @param idxRoot The CR3 (root) page index.
1784 */
1785int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1786{
1787 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1788 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1789 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1790 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1791
1792 if (!pPage->fMonitored)
1793 return VINF_SUCCESS;
1794
1795 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1796 int rc = pgmPoolMonitorFlush(pPool, pPage);
1797 if (rc != VERR_PGM_POOL_CLEARED)
1798 AssertFatalRC(rc);
1799 else
1800 rc = VINF_SUCCESS;
1801 pgmPoolHashRemove(pPool, pPage);
1802 Assert(!pPage->fMonitored);
1803 pPage->GCPhys = NIL_RTGCPHYS;
1804 return rc;
1805}
1806
1807# endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1808
1809/**
1810 * Inserts the page into the list of modified pages.
1811 *
1812 * @param pPool The pool.
1813 * @param pPage The page.
1814 */
1815void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1816{
1817 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1818 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1819 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1820 && pPool->iModifiedHead != pPage->idx,
1821 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1822 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1823 pPool->iModifiedHead, pPool->cModifiedPages));
1824
1825 pPage->iModifiedNext = pPool->iModifiedHead;
1826 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1827 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1828 pPool->iModifiedHead = pPage->idx;
1829 pPool->cModifiedPages++;
1830#ifdef VBOX_WITH_STATISTICS
1831 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1832 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1833#endif
1834}
1835
1836
1837/**
1838 * Removes the page from the list of modified pages and resets the
1839 * moficiation counter.
1840 *
1841 * @param pPool The pool.
1842 * @param pPage The page which is believed to be in the list of modified pages.
1843 */
1844static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1845{
1846 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1847 if (pPool->iModifiedHead == pPage->idx)
1848 {
1849 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1850 pPool->iModifiedHead = pPage->iModifiedNext;
1851 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1852 {
1853 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1854 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1855 }
1856 pPool->cModifiedPages--;
1857 }
1858 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1859 {
1860 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1861 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1862 {
1863 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1864 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1865 }
1866 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1867 pPool->cModifiedPages--;
1868 }
1869 else
1870 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1871 pPage->cModifications = 0;
1872}
1873
1874
1875/**
1876 * Zaps the list of modified pages, resetting their modification counters in the process.
1877 *
1878 * @param pVM The VM handle.
1879 */
1880void pgmPoolMonitorModifiedClearAll(PVM pVM)
1881{
1882 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1883 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1884
1885 unsigned cPages = 0; NOREF(cPages);
1886 uint16_t idx = pPool->iModifiedHead;
1887 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1888 while (idx != NIL_PGMPOOL_IDX)
1889 {
1890 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1891 idx = pPage->iModifiedNext;
1892 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1893 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1894 pPage->cModifications = 0;
1895 Assert(++cPages);
1896 }
1897 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1898 pPool->cModifiedPages = 0;
1899}
1900
1901
1902/**
1903 * Clear all shadow pages and clear all modification counters.
1904 *
1905 * @param pVM The VM handle.
1906 * @remark Should only be used when monitoring is available, thus placed in
1907 * the PGMPOOL_WITH_MONITORING #ifdef.
1908 */
1909void pgmPoolClearAll(PVM pVM)
1910{
1911 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1912 STAM_PROFILE_START(&pPool->StatClearAll, c);
1913 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1914
1915 /*
1916 * Iterate all the pages until we've encountered all that in use.
1917 * This is simple but not quite optimal solution.
1918 */
1919 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1920 unsigned cLeft = pPool->cUsedPages;
1921 unsigned iPage = pPool->cCurPages;
1922 while (--iPage >= PGMPOOL_IDX_FIRST)
1923 {
1924 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1925 if (pPage->GCPhys != NIL_RTGCPHYS)
1926 {
1927 switch (pPage->enmKind)
1928 {
1929 /*
1930 * We only care about shadow page tables.
1931 */
1932 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1933 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1934 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1935 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1936 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1937 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1938 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1939 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1940 {
1941#ifdef PGMPOOL_WITH_USER_TRACKING
1942 if (pPage->cPresent)
1943#endif
1944 {
1945 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1946 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1947 ASMMemZeroPage(pvShw);
1948 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1949#ifdef PGMPOOL_WITH_USER_TRACKING
1950 pPage->cPresent = 0;
1951 pPage->iFirstPresent = ~0;
1952#endif
1953 }
1954 }
1955 /* fall thru */
1956
1957 default:
1958 Assert(!pPage->cModifications || ++cModifiedPages);
1959 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1960 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1961 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1962 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1963 pPage->cModifications = 0;
1964 break;
1965
1966 }
1967 if (!--cLeft)
1968 break;
1969 }
1970 }
1971
1972 /* swipe the special pages too. */
1973 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1974 {
1975 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1976 if (pPage->GCPhys != NIL_RTGCPHYS)
1977 {
1978 Assert(!pPage->cModifications || ++cModifiedPages);
1979 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1980 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1981 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1982 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1983 pPage->cModifications = 0;
1984 }
1985 }
1986
1987#ifndef DEBUG_michael
1988 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1989#endif
1990 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1991 pPool->cModifiedPages = 0;
1992
1993#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1994 /*
1995 * Clear all the GCPhys links and rebuild the phys ext free list.
1996 */
1997 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
1998 pRam;
1999 pRam = pRam->CTX_SUFF(pNext))
2000 {
2001 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2002 while (iPage-- > 0)
2003 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2004 }
2005
2006 pPool->iPhysExtFreeHead = 0;
2007 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2008 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2009 for (unsigned i = 0; i < cMaxPhysExts; i++)
2010 {
2011 paPhysExts[i].iNext = i + 1;
2012 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2013 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2014 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2015 }
2016 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2017#endif
2018
2019
2020 pPool->cPresent = 0;
2021 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2022}
2023
2024
2025/**
2026 * Handle SyncCR3 pool tasks
2027 *
2028 * @returns VBox status code.
2029 * @retval VINF_SUCCESS if successfully added.
2030 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2031 * @param pVM The VM handle.
2032 * @remark Should only be used when monitoring is available, thus placed in
2033 * the PGMPOOL_WITH_MONITORING #ifdef.
2034 */
2035int pgmPoolSyncCR3(PVM pVM)
2036{
2037 /*
2038 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2039 * Occasionally we will have to clear all the shadow page tables because we wanted
2040 * to monitor a page which was mapped by too many shadowed page tables. This operation
2041 * sometimes refered to as a 'lightweight flush'.
2042 */
2043 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2044 pgmPoolMonitorModifiedClearAll(pVM);
2045 else
2046 {
2047# ifndef IN_GC
2048 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2049 pgmPoolClearAll(pVM);
2050# else
2051 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2052 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2053 return VINF_PGM_SYNC_CR3;
2054# endif
2055 }
2056 return VINF_SUCCESS;
2057}
2058
2059#endif /* PGMPOOL_WITH_MONITORING */
2060#ifdef PGMPOOL_WITH_USER_TRACKING
2061
2062/**
2063 * Frees up at least one user entry.
2064 *
2065 * @returns VBox status code.
2066 * @retval VINF_SUCCESS if successfully added.
2067 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2068 * @param pPool The pool.
2069 * @param iUser The user index.
2070 */
2071static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2072{
2073 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2074#ifdef PGMPOOL_WITH_CACHE
2075 /*
2076 * Just free cached pages in a braindead fashion.
2077 */
2078 /** @todo walk the age list backwards and free the first with usage. */
2079 int rc = VINF_SUCCESS;
2080 do
2081 {
2082 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2083 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
2084 rc = rc2;
2085 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2086 return rc;
2087#else
2088 /*
2089 * Lazy approach.
2090 */
2091 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2092 Assert(!CPUMIsGuestInLongMode(pVM));
2093 pgmPoolFlushAllInt(pPool);
2094 return VERR_PGM_POOL_FLUSHED;
2095#endif
2096}
2097
2098
2099/**
2100 * Inserts a page into the cache.
2101 *
2102 * This will create user node for the page, insert it into the GCPhys
2103 * hash, and insert it into the age list.
2104 *
2105 * @returns VBox status code.
2106 * @retval VINF_SUCCESS if successfully added.
2107 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2108 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2109 * @param pPool The pool.
2110 * @param pPage The cached page.
2111 * @param GCPhys The GC physical address of the page we're gonna shadow.
2112 * @param iUser The user index.
2113 * @param iUserTable The user table index.
2114 */
2115DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2116{
2117 int rc = VINF_SUCCESS;
2118 PPGMPOOLUSER pUser = pPool->CTX_SUFF(paUsers);
2119
2120 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
2121
2122 /*
2123 * Find free a user node.
2124 */
2125 uint16_t i = pPool->iUserFreeHead;
2126 if (i == NIL_PGMPOOL_USER_INDEX)
2127 {
2128 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2129 if (VBOX_FAILURE(rc))
2130 return rc;
2131 i = pPool->iUserFreeHead;
2132 }
2133
2134 /*
2135 * Unlink the user node from the free list,
2136 * initialize and insert it into the user list.
2137 */
2138 pPool->iUserFreeHead = pUser[i].iNext;
2139 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
2140 pUser[i].iUser = iUser;
2141 pUser[i].iUserTable = iUserTable;
2142 pPage->iUserHead = i;
2143
2144 /*
2145 * Insert into cache and enable monitoring of the guest page if enabled.
2146 *
2147 * Until we implement caching of all levels, including the CR3 one, we'll
2148 * have to make sure we don't try monitor & cache any recursive reuse of
2149 * a monitored CR3 page. Because all windows versions are doing this we'll
2150 * have to be able to do combined access monitoring, CR3 + PT and
2151 * PD + PT (guest PAE).
2152 *
2153 * Update:
2154 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2155 */
2156#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2157# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2158 const bool fCanBeMonitored = true;
2159# else
2160 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2161 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2162 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2163# endif
2164# ifdef PGMPOOL_WITH_CACHE
2165 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2166# endif
2167 if (fCanBeMonitored)
2168 {
2169# ifdef PGMPOOL_WITH_MONITORING
2170 rc = pgmPoolMonitorInsert(pPool, pPage);
2171 if (rc == VERR_PGM_POOL_CLEARED)
2172 {
2173 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2174# ifndef PGMPOOL_WITH_CACHE
2175 pgmPoolMonitorFlush(pPool, pPage);
2176 rc = VERR_PGM_POOL_FLUSHED;
2177# endif
2178 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2179 pUser[i].iNext = pPool->iUserFreeHead;
2180 pUser[i].iUser = NIL_PGMPOOL_IDX;
2181 pPool->iUserFreeHead = i;
2182 }
2183 }
2184# endif
2185#endif /* PGMPOOL_WITH_MONITORING */
2186 return rc;
2187}
2188
2189
2190# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2191/**
2192 * Adds a user reference to a page.
2193 *
2194 * This will
2195 * This will move the page to the head of the
2196 *
2197 * @returns VBox status code.
2198 * @retval VINF_SUCCESS if successfully added.
2199 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2200 * @param pPool The pool.
2201 * @param pPage The cached page.
2202 * @param iUser The user index.
2203 * @param iUserTable The user table.
2204 */
2205static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2206{
2207 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2208
2209 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
2210# ifdef VBOX_STRICT
2211 /*
2212 * Check that the entry doesn't already exists.
2213 */
2214 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2215 {
2216 uint16_t i = pPage->iUserHead;
2217 do
2218 {
2219 Assert(i < pPool->cMaxUsers);
2220 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2221 i = paUsers[i].iNext;
2222 } while (i != NIL_PGMPOOL_USER_INDEX);
2223 }
2224# endif
2225
2226 /*
2227 * Allocate a user node.
2228 */
2229 uint16_t i = pPool->iUserFreeHead;
2230 if (i == NIL_PGMPOOL_USER_INDEX)
2231 {
2232 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2233 if (VBOX_FAILURE(rc))
2234 return rc;
2235 i = pPool->iUserFreeHead;
2236 }
2237 pPool->iUserFreeHead = paUsers[i].iNext;
2238
2239 /*
2240 * Initialize the user node and insert it.
2241 */
2242 paUsers[i].iNext = pPage->iUserHead;
2243 paUsers[i].iUser = iUser;
2244 paUsers[i].iUserTable = iUserTable;
2245 pPage->iUserHead = i;
2246
2247# ifdef PGMPOOL_WITH_CACHE
2248 /*
2249 * Tell the cache to update its replacement stats for this page.
2250 */
2251 pgmPoolCacheUsed(pPool, pPage);
2252# endif
2253 return VINF_SUCCESS;
2254}
2255# endif /* PGMPOOL_WITH_CACHE */
2256
2257
2258/**
2259 * Frees a user record associated with a page.
2260 *
2261 * This does not clear the entry in the user table, it simply replaces the
2262 * user record to the chain of free records.
2263 *
2264 * @param pPool The pool.
2265 * @param HCPhys The HC physical address of the shadow page.
2266 * @param iUser The shadow page pool index of the user table.
2267 * @param iUserTable The index into the user table (shadowed).
2268 */
2269static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2270{
2271 /*
2272 * Unlink and free the specified user entry.
2273 */
2274 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2275
2276 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2277 uint16_t i = pPage->iUserHead;
2278 if ( i != NIL_PGMPOOL_USER_INDEX
2279 && paUsers[i].iUser == iUser
2280 && paUsers[i].iUserTable == iUserTable)
2281 {
2282 pPage->iUserHead = paUsers[i].iNext;
2283
2284 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2285 paUsers[i].iNext = pPool->iUserFreeHead;
2286 pPool->iUserFreeHead = i;
2287 return;
2288 }
2289
2290 /* General: Linear search. */
2291 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2292 while (i != NIL_PGMPOOL_USER_INDEX)
2293 {
2294 if ( paUsers[i].iUser == iUser
2295 && paUsers[i].iUserTable == iUserTable)
2296 {
2297 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2298 paUsers[iPrev].iNext = paUsers[i].iNext;
2299 else
2300 pPage->iUserHead = paUsers[i].iNext;
2301
2302 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2303 paUsers[i].iNext = pPool->iUserFreeHead;
2304 pPool->iUserFreeHead = i;
2305 return;
2306 }
2307 iPrev = i;
2308 i = paUsers[i].iNext;
2309 }
2310
2311 /* Fatal: didn't find it */
2312 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
2313 iUser, iUserTable, pPage->GCPhys));
2314}
2315
2316
2317/**
2318 * Gets the entry size of a shadow table.
2319 *
2320 * @param enmKind The kind of page.
2321 *
2322 * @returns The size of the entry in bytes. That is, 4 or 8.
2323 * @returns If the kind is not for a table, an assertion is raised and 0 is
2324 * returned.
2325 */
2326DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2327{
2328 switch (enmKind)
2329 {
2330 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2331 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2332 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2333 case PGMPOOLKIND_ROOT_32BIT_PD:
2334 return 4;
2335
2336 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2337 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2338 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2339 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2340 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2341 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2342 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2343 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2344 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2345 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2346 case PGMPOOLKIND_ROOT_PAE_PD:
2347 case PGMPOOLKIND_ROOT_PDPT:
2348 case PGMPOOLKIND_ROOT_NESTED:
2349 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2350 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2351 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2352 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2353 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2354 return 8;
2355
2356 default:
2357 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2358 }
2359}
2360
2361
2362/**
2363 * Gets the entry size of a guest table.
2364 *
2365 * @param enmKind The kind of page.
2366 *
2367 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2368 * @returns If the kind is not for a table, an assertion is raised and 0 is
2369 * returned.
2370 */
2371DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2372{
2373 switch (enmKind)
2374 {
2375 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2376 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2377 case PGMPOOLKIND_ROOT_32BIT_PD:
2378 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2379 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2380 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2381 return 4;
2382
2383 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2384 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2385 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2386 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2387 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2388 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2389 case PGMPOOLKIND_ROOT_PAE_PD:
2390 case PGMPOOLKIND_ROOT_PDPT:
2391 return 8;
2392
2393 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2394 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2395 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2396 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2397 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2398 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2399 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2400 case PGMPOOLKIND_ROOT_NESTED:
2401 /** @todo can we return 0? (nobody is calling this...) */
2402 AssertFailed();
2403 return 0;
2404
2405 default:
2406 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2407 }
2408}
2409
2410#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2411
2412/**
2413 * Scans one shadow page table for mappings of a physical page.
2414 *
2415 * @param pVM The VM handle.
2416 * @param pPhysPage The guest page in question.
2417 * @param iShw The shadow page table.
2418 * @param cRefs The number of references made in that PT.
2419 */
2420static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2421{
2422 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2423 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2424
2425 /*
2426 * Assert sanity.
2427 */
2428 Assert(cRefs == 1);
2429 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2430 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2431
2432 /*
2433 * Then, clear the actual mappings to the page in the shadow PT.
2434 */
2435 switch (pPage->enmKind)
2436 {
2437 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2438 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2439 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2440 {
2441 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2442 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2443 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2444 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2445 {
2446 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2447 pPT->a[i].u = 0;
2448 cRefs--;
2449 if (!cRefs)
2450 return;
2451 }
2452#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2453 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2454 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2455 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2456 {
2457 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2458 pPT->a[i].u = 0;
2459 }
2460#endif
2461 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2462 break;
2463 }
2464
2465 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2466 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2467 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2468 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2469 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2470 {
2471 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2472 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2473 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2474 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2475 {
2476 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2477 pPT->a[i].u = 0;
2478 cRefs--;
2479 if (!cRefs)
2480 return;
2481 }
2482#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2483 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2484 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2485 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2486 {
2487 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2488 pPT->a[i].u = 0;
2489 }
2490#endif
2491 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2492 break;
2493 }
2494
2495 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2496 {
2497 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2498 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2499 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2500 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2501 {
2502 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2503 pPT->a[i].u = 0;
2504 cRefs--;
2505 if (!cRefs)
2506 return;
2507 }
2508#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2509 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2510 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2511 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2512 {
2513 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2514 pPT->a[i].u = 0;
2515 }
2516#endif
2517 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2518 break;
2519 }
2520
2521 default:
2522 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2523 }
2524}
2525
2526
2527/**
2528 * Scans one shadow page table for mappings of a physical page.
2529 *
2530 * @param pVM The VM handle.
2531 * @param pPhysPage The guest page in question.
2532 * @param iShw The shadow page table.
2533 * @param cRefs The number of references made in that PT.
2534 */
2535void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2536{
2537 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2538 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2539 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2540 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2541 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2542 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2543}
2544
2545
2546/**
2547 * Flushes a list of shadow page tables mapping the same physical page.
2548 *
2549 * @param pVM The VM handle.
2550 * @param pPhysPage The guest page in question.
2551 * @param iPhysExt The physical cross reference extent list to flush.
2552 */
2553void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2554{
2555 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2556 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2557 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2558
2559 const uint16_t iPhysExtStart = iPhysExt;
2560 PPGMPOOLPHYSEXT pPhysExt;
2561 do
2562 {
2563 Assert(iPhysExt < pPool->cMaxPhysExts);
2564 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2565 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2566 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2567 {
2568 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2569 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2570 }
2571
2572 /* next */
2573 iPhysExt = pPhysExt->iNext;
2574 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2575
2576 /* insert the list into the free list and clear the ram range entry. */
2577 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2578 pPool->iPhysExtFreeHead = iPhysExtStart;
2579 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2580
2581 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2582}
2583
2584#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2585
2586/**
2587 * Scans all shadow page tables for mappings of a physical page.
2588 *
2589 * This may be slow, but it's most likely more efficient than cleaning
2590 * out the entire page pool / cache.
2591 *
2592 * @returns VBox status code.
2593 * @retval VINF_SUCCESS if all references has been successfully cleared.
2594 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2595 * a page pool cleaning.
2596 *
2597 * @param pVM The VM handle.
2598 * @param pPhysPage The guest page in question.
2599 */
2600int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2601{
2602 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2603 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2604 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2605 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2606
2607#if 1
2608 /*
2609 * There is a limit to what makes sense.
2610 */
2611 if (pPool->cPresent > 1024)
2612 {
2613 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2614 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2615 return VINF_PGM_GCPHYS_ALIASED;
2616 }
2617#endif
2618
2619 /*
2620 * Iterate all the pages until we've encountered all that in use.
2621 * This is simple but not quite optimal solution.
2622 */
2623 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2624 const uint32_t u32 = u64;
2625 unsigned cLeft = pPool->cUsedPages;
2626 unsigned iPage = pPool->cCurPages;
2627 while (--iPage >= PGMPOOL_IDX_FIRST)
2628 {
2629 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2630 if (pPage->GCPhys != NIL_RTGCPHYS)
2631 {
2632 switch (pPage->enmKind)
2633 {
2634 /*
2635 * We only care about shadow page tables.
2636 */
2637 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2638 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2639 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2640 {
2641 unsigned cPresent = pPage->cPresent;
2642 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2643 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2644 if (pPT->a[i].n.u1Present)
2645 {
2646 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2647 {
2648 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2649 pPT->a[i].u = 0;
2650 }
2651 if (!--cPresent)
2652 break;
2653 }
2654 break;
2655 }
2656
2657 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2658 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2659 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2660 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2661 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2662 {
2663 unsigned cPresent = pPage->cPresent;
2664 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2665 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2666 if (pPT->a[i].n.u1Present)
2667 {
2668 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2669 {
2670 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2671 pPT->a[i].u = 0;
2672 }
2673 if (!--cPresent)
2674 break;
2675 }
2676 break;
2677 }
2678 }
2679 if (!--cLeft)
2680 break;
2681 }
2682 }
2683
2684 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2685 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2686 return VINF_SUCCESS;
2687}
2688
2689
2690/**
2691 * Clears the user entry in a user table.
2692 *
2693 * This is used to remove all references to a page when flushing it.
2694 */
2695static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2696{
2697 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2698 Assert(pUser->iUser < pPool->cCurPages);
2699
2700 /*
2701 * Map the user page.
2702 */
2703 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2704 union
2705 {
2706 uint64_t *pau64;
2707 uint32_t *pau32;
2708 } u;
2709 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
2710
2711 /* Safety precaution in case we change the paging for other modes too in the future. */
2712 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
2713
2714#ifdef VBOX_STRICT
2715 /*
2716 * Some sanity checks.
2717 */
2718 switch (pUserPage->enmKind)
2719 {
2720 case PGMPOOLKIND_ROOT_32BIT_PD:
2721 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2722 Assert(!(u.pau32[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2723 break;
2724 case PGMPOOLKIND_ROOT_PAE_PD:
2725 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2726 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2727 break;
2728 case PGMPOOLKIND_ROOT_PDPT:
2729 Assert(pUser->iUserTable < 4);
2730 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2731 break;
2732 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2733 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2734 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2735 break;
2736 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2737 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2738 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2739 break;
2740 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2741 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2742 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2743 break;
2744 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2745 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2746 /* GCPhys >> PAGE_SHIFT is the index here */
2747 break;
2748 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2749 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2750 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2751 break;
2752
2753 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2754 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2755 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2756 break;
2757
2758 case PGMPOOLKIND_ROOT_NESTED:
2759 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2760 break;
2761
2762 default:
2763 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2764 break;
2765 }
2766#endif /* VBOX_STRICT */
2767
2768 /*
2769 * Clear the entry in the user page.
2770 */
2771 switch (pUserPage->enmKind)
2772 {
2773 /* 32-bit entries */
2774 case PGMPOOLKIND_ROOT_32BIT_PD:
2775 u.pau32[pUser->iUserTable] = 0;
2776 break;
2777
2778 /* 64-bit entries */
2779 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2780 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2781 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2782 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2783 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2784 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2785 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2786 case PGMPOOLKIND_ROOT_PAE_PD:
2787 case PGMPOOLKIND_ROOT_PDPT:
2788 case PGMPOOLKIND_ROOT_NESTED:
2789 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2790 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2791 u.pau64[pUser->iUserTable] = 0;
2792 break;
2793
2794 default:
2795 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2796 }
2797}
2798
2799
2800/**
2801 * Clears all users of a page.
2802 */
2803static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2804{
2805 /*
2806 * Free all the user records.
2807 */
2808 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2809 uint16_t i = pPage->iUserHead;
2810 while (i != NIL_PGMPOOL_USER_INDEX)
2811 {
2812 /* Clear enter in user table. */
2813 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2814
2815 /* Free it. */
2816 const uint16_t iNext = paUsers[i].iNext;
2817 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2818 paUsers[i].iNext = pPool->iUserFreeHead;
2819 pPool->iUserFreeHead = i;
2820
2821 /* Next. */
2822 i = iNext;
2823 }
2824 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2825}
2826
2827#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2828
2829/**
2830 * Allocates a new physical cross reference extent.
2831 *
2832 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2833 * @param pVM The VM handle.
2834 * @param piPhysExt Where to store the phys ext index.
2835 */
2836PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2837{
2838 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2839 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2840 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2841 {
2842 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2843 return NULL;
2844 }
2845 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2846 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2847 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2848 *piPhysExt = iPhysExt;
2849 return pPhysExt;
2850}
2851
2852
2853/**
2854 * Frees a physical cross reference extent.
2855 *
2856 * @param pVM The VM handle.
2857 * @param iPhysExt The extent to free.
2858 */
2859void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2860{
2861 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2862 Assert(iPhysExt < pPool->cMaxPhysExts);
2863 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2864 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2865 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2866 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2867 pPool->iPhysExtFreeHead = iPhysExt;
2868}
2869
2870
2871/**
2872 * Frees a physical cross reference extent.
2873 *
2874 * @param pVM The VM handle.
2875 * @param iPhysExt The extent to free.
2876 */
2877void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2878{
2879 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2880
2881 const uint16_t iPhysExtStart = iPhysExt;
2882 PPGMPOOLPHYSEXT pPhysExt;
2883 do
2884 {
2885 Assert(iPhysExt < pPool->cMaxPhysExts);
2886 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2887 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2888 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2889
2890 /* next */
2891 iPhysExt = pPhysExt->iNext;
2892 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2893
2894 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2895 pPool->iPhysExtFreeHead = iPhysExtStart;
2896}
2897
2898
2899/**
2900 * Insert a reference into a list of physical cross reference extents.
2901 *
2902 * @returns The new ram range flags (top 16-bits).
2903 *
2904 * @param pVM The VM handle.
2905 * @param iPhysExt The physical extent index of the list head.
2906 * @param iShwPT The shadow page table index.
2907 *
2908 */
2909static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2910{
2911 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2912 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2913
2914 /* special common case. */
2915 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2916 {
2917 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2918 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2919 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2920 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2921 }
2922
2923 /* general treatment. */
2924 const uint16_t iPhysExtStart = iPhysExt;
2925 unsigned cMax = 15;
2926 for (;;)
2927 {
2928 Assert(iPhysExt < pPool->cMaxPhysExts);
2929 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2930 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2931 {
2932 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2933 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2934 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2935 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2936 }
2937 if (!--cMax)
2938 {
2939 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2940 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2941 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2942 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2943 }
2944 }
2945
2946 /* add another extent to the list. */
2947 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2948 if (!pNew)
2949 {
2950 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2951 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2952 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2953 }
2954 pNew->iNext = iPhysExtStart;
2955 pNew->aidx[0] = iShwPT;
2956 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2957 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2958}
2959
2960
2961/**
2962 * Add a reference to guest physical page where extents are in use.
2963 *
2964 * @returns The new ram range flags (top 16-bits).
2965 *
2966 * @param pVM The VM handle.
2967 * @param u16 The ram range flags (top 16-bits).
2968 * @param iShwPT The shadow page table index.
2969 */
2970uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2971{
2972 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2973 {
2974 /*
2975 * Convert to extent list.
2976 */
2977 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2978 uint16_t iPhysExt;
2979 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2980 if (pPhysExt)
2981 {
2982 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2983 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2984 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2985 pPhysExt->aidx[1] = iShwPT;
2986 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2987 }
2988 else
2989 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2990 }
2991 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2992 {
2993 /*
2994 * Insert into the extent list.
2995 */
2996 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2997 }
2998 else
2999 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3000 return u16;
3001}
3002
3003
3004/**
3005 * Clear references to guest physical memory.
3006 *
3007 * @param pPool The pool.
3008 * @param pPage The page.
3009 * @param pPhysPage Pointer to the aPages entry in the ram range.
3010 */
3011void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3012{
3013 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
3014 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3015
3016 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
3017 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
3018 {
3019 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3020 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3021 do
3022 {
3023 Assert(iPhysExt < pPool->cMaxPhysExts);
3024
3025 /*
3026 * Look for the shadow page and check if it's all freed.
3027 */
3028 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3029 {
3030 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3031 {
3032 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3033
3034 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3035 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3036 {
3037 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3038 return;
3039 }
3040
3041 /* we can free the node. */
3042 PVM pVM = pPool->CTX_SUFF(pVM);
3043 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3044 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3045 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3046 {
3047 /* lonely node */
3048 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3049 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
3050 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3051 }
3052 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3053 {
3054 /* head */
3055 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
3056 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
3057 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
3058 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
3059 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3060 }
3061 else
3062 {
3063 /* in list */
3064 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3065 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3066 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3067 }
3068 iPhysExt = iPhysExtNext;
3069 return;
3070 }
3071 }
3072
3073 /* next */
3074 iPhysExtPrev = iPhysExt;
3075 iPhysExt = paPhysExts[iPhysExt].iNext;
3076 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3077
3078 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3079 }
3080 else /* nothing to do */
3081 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
3082}
3083
3084
3085/**
3086 * Clear references to guest physical memory.
3087 *
3088 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3089 * is assumed to be correct, so the linear search can be skipped and we can assert
3090 * at an earlier point.
3091 *
3092 * @param pPool The pool.
3093 * @param pPage The page.
3094 * @param HCPhys The host physical address corresponding to the guest page.
3095 * @param GCPhys The guest physical address corresponding to HCPhys.
3096 */
3097static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3098{
3099 /*
3100 * Walk range list.
3101 */
3102 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3103 while (pRam)
3104 {
3105 RTGCPHYS off = GCPhys - pRam->GCPhys;
3106 if (off < pRam->cb)
3107 {
3108 /* does it match? */
3109 const unsigned iPage = off >> PAGE_SHIFT;
3110 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3111#ifdef LOG_ENABLED
3112RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3113Log(("pgmPoolTracDerefGCPhys %VHp vs %VHp\n", HCPhysPage, HCPhys));
3114#endif
3115 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3116 {
3117 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3118 return;
3119 }
3120 break;
3121 }
3122 pRam = pRam->CTX_SUFF(pNext);
3123 }
3124 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
3125}
3126
3127
3128/**
3129 * Clear references to guest physical memory.
3130 *
3131 * @param pPool The pool.
3132 * @param pPage The page.
3133 * @param HCPhys The host physical address corresponding to the guest page.
3134 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3135 */
3136static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3137{
3138 /*
3139 * Walk range list.
3140 */
3141 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3142 while (pRam)
3143 {
3144 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3145 if (off < pRam->cb)
3146 {
3147 /* does it match? */
3148 const unsigned iPage = off >> PAGE_SHIFT;
3149 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3150 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3151 {
3152 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3153 return;
3154 }
3155 break;
3156 }
3157 pRam = pRam->CTX_SUFF(pNext);
3158 }
3159
3160 /*
3161 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3162 */
3163 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3164 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3165 while (pRam)
3166 {
3167 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3168 while (iPage-- > 0)
3169 {
3170 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3171 {
3172 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
3173 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3174 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3175 return;
3176 }
3177 }
3178 pRam = pRam->CTX_SUFF(pNext);
3179 }
3180
3181 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
3182}
3183
3184
3185/**
3186 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3187 *
3188 * @param pPool The pool.
3189 * @param pPage The page.
3190 * @param pShwPT The shadow page table (mapping of the page).
3191 * @param pGstPT The guest page table.
3192 */
3193DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3194{
3195 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3196 if (pShwPT->a[i].n.u1Present)
3197 {
3198 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3199 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3200 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3201 if (!--pPage->cPresent)
3202 break;
3203 }
3204}
3205
3206
3207/**
3208 * Clear references to guest physical memory in a PAE / 32-bit page table.
3209 *
3210 * @param pPool The pool.
3211 * @param pPage The page.
3212 * @param pShwPT The shadow page table (mapping of the page).
3213 * @param pGstPT The guest page table (just a half one).
3214 */
3215DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3216{
3217 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3218 if (pShwPT->a[i].n.u1Present)
3219 {
3220 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3221 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3222 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3223 }
3224}
3225
3226
3227/**
3228 * Clear references to guest physical memory in a PAE / PAE page table.
3229 *
3230 * @param pPool The pool.
3231 * @param pPage The page.
3232 * @param pShwPT The shadow page table (mapping of the page).
3233 * @param pGstPT The guest page table.
3234 */
3235DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3236{
3237 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3238 if (pShwPT->a[i].n.u1Present)
3239 {
3240 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3241 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3242 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3243 }
3244}
3245
3246
3247/**
3248 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3249 *
3250 * @param pPool The pool.
3251 * @param pPage The page.
3252 * @param pShwPT The shadow page table (mapping of the page).
3253 */
3254DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3255{
3256 RTGCPHYS GCPhys = pPage->GCPhys;
3257 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3258 if (pShwPT->a[i].n.u1Present)
3259 {
3260 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3261 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3262 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3263 }
3264}
3265
3266
3267/**
3268 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3269 *
3270 * @param pPool The pool.
3271 * @param pPage The page.
3272 * @param pShwPT The shadow page table (mapping of the page).
3273 */
3274DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3275{
3276 RTGCPHYS GCPhys = pPage->GCPhys;
3277 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3278 if (pShwPT->a[i].n.u1Present)
3279 {
3280 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%VGp\n",
3281 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3282 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3283 }
3284}
3285
3286#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3287
3288/**
3289 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3290 *
3291 * @param pPool The pool.
3292 * @param pPage The page.
3293 * @param pShwPD The shadow page directory (mapping of the page).
3294 */
3295DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3296{
3297 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3298 {
3299 if (pShwPD->a[i].n.u1Present)
3300 {
3301 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3302 if (pSubPage)
3303 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3304 else
3305 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3306 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3307 }
3308 }
3309}
3310
3311
3312/**
3313 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3314 *
3315 * @param pPool The pool.
3316 * @param pPage The page.
3317 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3318 */
3319DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3320{
3321 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3322 {
3323 if (pShwPDPT->a[i].n.u1Present)
3324 {
3325 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3326 if (pSubPage)
3327 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3328 else
3329 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3330 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3331 }
3332 }
3333}
3334
3335
3336/**
3337 * Clear references to shadowed pages in a 64-bit level 4 page table.
3338 *
3339 * @param pPool The pool.
3340 * @param pPage The page.
3341 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3342 */
3343DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3344{
3345 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3346 {
3347 if (pShwPML4->a[i].n.u1Present)
3348 {
3349 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3350 if (pSubPage)
3351 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3352 else
3353 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3354 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3355 }
3356 }
3357}
3358
3359
3360/**
3361 * Clear references to shadowed pages in an EPT page table.
3362 *
3363 * @param pPool The pool.
3364 * @param pPage The page.
3365 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3366 */
3367DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3368{
3369 RTGCPHYS GCPhys = pPage->GCPhys;
3370 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3371 if (pShwPT->a[i].n.u1Present)
3372 {
3373 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3374 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3375 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3376 }
3377}
3378
3379
3380/**
3381 * Clear references to shadowed pages in an EPT page directory.
3382 *
3383 * @param pPool The pool.
3384 * @param pPage The page.
3385 * @param pShwPD The shadow page directory (mapping of the page).
3386 */
3387DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3388{
3389 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3390 {
3391 if (pShwPD->a[i].n.u1Present)
3392 {
3393 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3394 if (pSubPage)
3395 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3396 else
3397 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3398 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3399 }
3400 }
3401}
3402
3403
3404/**
3405 * Clear references to shadowed pages in an EPT page directory pointer table.
3406 *
3407 * @param pPool The pool.
3408 * @param pPage The page.
3409 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3410 */
3411DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3412{
3413 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3414 {
3415 if (pShwPDPT->a[i].n.u1Present)
3416 {
3417 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3418 if (pSubPage)
3419 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3420 else
3421 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3422 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3423 }
3424 }
3425}
3426
3427
3428/**
3429 * Clears all references made by this page.
3430 *
3431 * This includes other shadow pages and GC physical addresses.
3432 *
3433 * @param pPool The pool.
3434 * @param pPage The page.
3435 */
3436static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3437{
3438 /*
3439 * Map the shadow page and take action according to the page kind.
3440 */
3441 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3442 switch (pPage->enmKind)
3443 {
3444#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3445 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3446 {
3447 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3448 void *pvGst;
3449 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3450 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3451 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3452 break;
3453 }
3454
3455 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3456 {
3457 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3458 void *pvGst;
3459 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3460 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3461 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3462 break;
3463 }
3464
3465 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3466 {
3467 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3468 void *pvGst;
3469 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3470 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3471 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3472 break;
3473 }
3474
3475 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3476 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3477 {
3478 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3479 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3480 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3481 break;
3482 }
3483
3484 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3485 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3486 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3487 {
3488 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3489 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3490 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3491 break;
3492 }
3493
3494#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3495 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3496 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3497 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3498 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3499 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3500 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3501 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3502 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3503 break;
3504#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3505
3506 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
3507 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3508 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3509 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3510 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3511 break;
3512
3513 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3514 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3515 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3516 break;
3517
3518 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
3519 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3520 break;
3521
3522 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3523 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
3524 break;
3525
3526 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3527 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
3528 break;
3529
3530 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3531 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
3532 break;
3533
3534 default:
3535 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3536 }
3537
3538 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3539 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3540 ASMMemZeroPage(pvShw);
3541 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3542 pPage->fZeroed = true;
3543}
3544
3545#endif /* PGMPOOL_WITH_USER_TRACKING */
3546
3547/**
3548 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3549 *
3550 * @param pPool The pool.
3551 */
3552static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3553{
3554 /*
3555 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3556 */
3557 Assert(NIL_PGMPOOL_IDX == 0);
3558 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3559 {
3560 /*
3561 * Get the page address.
3562 */
3563 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3564 union
3565 {
3566 uint64_t *pau64;
3567 uint32_t *pau32;
3568 } u;
3569 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3570
3571 /*
3572 * Mark stuff not present.
3573 */
3574 switch (pPage->enmKind)
3575 {
3576 case PGMPOOLKIND_ROOT_32BIT_PD:
3577 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3578 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3579 u.pau32[iPage] = 0;
3580 break;
3581
3582 case PGMPOOLKIND_ROOT_PAE_PD:
3583 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * X86_PG_PAE_PDPE_ENTRIES; iPage++)
3584 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3585 u.pau64[iPage] = 0;
3586 break;
3587
3588 case PGMPOOLKIND_ROOT_PDPT:
3589 /* Not root of shadowed pages currently, ignore it. */
3590 break;
3591
3592 case PGMPOOLKIND_ROOT_NESTED:
3593 ASMMemZero32(u.pau64, PAGE_SIZE);
3594 break;
3595 }
3596 }
3597
3598 /*
3599 * Paranoia (to be removed), flag a global CR3 sync.
3600 */
3601 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
3602}
3603
3604
3605/**
3606 * Flushes the entire cache.
3607 *
3608 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3609 * and execute this CR3 flush.
3610 *
3611 * @param pPool The pool.
3612 */
3613static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3614{
3615 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3616 LogFlow(("pgmPoolFlushAllInt:\n"));
3617
3618 /*
3619 * If there are no pages in the pool, there is nothing to do.
3620 */
3621 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3622 {
3623 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3624 return;
3625 }
3626
3627 /*
3628 * Nuke the free list and reinsert all pages into it.
3629 */
3630 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3631 {
3632 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3633
3634#ifdef IN_RING3
3635 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMR3, pPage->pvPageR3));
3636#endif
3637#ifdef PGMPOOL_WITH_MONITORING
3638 if (pPage->fMonitored)
3639 pgmPoolMonitorFlush(pPool, pPage);
3640 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3641 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3642 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3643 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3644 pPage->cModifications = 0;
3645#endif
3646 pPage->GCPhys = NIL_RTGCPHYS;
3647 pPage->enmKind = PGMPOOLKIND_FREE;
3648 Assert(pPage->idx == i);
3649 pPage->iNext = i + 1;
3650 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3651 pPage->fSeenNonGlobal = false;
3652 pPage->fMonitored= false;
3653 pPage->fCached = false;
3654 pPage->fReusedFlushPending = false;
3655 pPage->fCR3Mix = false;
3656#ifdef PGMPOOL_WITH_USER_TRACKING
3657 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3658#endif
3659#ifdef PGMPOOL_WITH_CACHE
3660 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3661 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3662#endif
3663 }
3664 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3665 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3666 pPool->cUsedPages = 0;
3667
3668#ifdef PGMPOOL_WITH_USER_TRACKING
3669 /*
3670 * Zap and reinitialize the user records.
3671 */
3672 pPool->cPresent = 0;
3673 pPool->iUserFreeHead = 0;
3674 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3675 const unsigned cMaxUsers = pPool->cMaxUsers;
3676 for (unsigned i = 0; i < cMaxUsers; i++)
3677 {
3678 paUsers[i].iNext = i + 1;
3679 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3680 paUsers[i].iUserTable = 0xfffffffe;
3681 }
3682 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3683#endif
3684
3685#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3686 /*
3687 * Clear all the GCPhys links and rebuild the phys ext free list.
3688 */
3689 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3690 pRam;
3691 pRam = pRam->CTX_SUFF(pNext))
3692 {
3693 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3694 while (iPage-- > 0)
3695 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3696 }
3697
3698 pPool->iPhysExtFreeHead = 0;
3699 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3700 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3701 for (unsigned i = 0; i < cMaxPhysExts; i++)
3702 {
3703 paPhysExts[i].iNext = i + 1;
3704 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3705 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3706 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3707 }
3708 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3709#endif
3710
3711#ifdef PGMPOOL_WITH_MONITORING
3712 /*
3713 * Just zap the modified list.
3714 */
3715 pPool->cModifiedPages = 0;
3716 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3717#endif
3718
3719#ifdef PGMPOOL_WITH_CACHE
3720 /*
3721 * Clear the GCPhys hash and the age list.
3722 */
3723 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
3724 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3725 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3726 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3727#endif
3728
3729 /*
3730 * Flush all the special root pages.
3731 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3732 */
3733 pgmPoolFlushAllSpecialRoots(pPool);
3734 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3735 {
3736 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3737 pPage->iNext = NIL_PGMPOOL_IDX;
3738#ifdef PGMPOOL_WITH_MONITORING
3739 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3740 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3741 pPage->cModifications = 0;
3742 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3743 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3744 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3745 if (pPage->fMonitored)
3746 {
3747 PVM pVM = pPool->CTX_SUFF(pVM);
3748 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3749 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3750 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3751 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
3752 pPool->pszAccessHandler);
3753 AssertFatalRCSuccess(rc);
3754# ifdef PGMPOOL_WITH_CACHE
3755 pgmPoolHashInsert(pPool, pPage);
3756# endif
3757 }
3758#endif
3759#ifdef PGMPOOL_WITH_USER_TRACKING
3760 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3761#endif
3762#ifdef PGMPOOL_WITH_CACHE
3763 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3764 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3765#endif
3766 }
3767
3768 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3769}
3770
3771
3772/**
3773 * Flushes a pool page.
3774 *
3775 * This moves the page to the free list after removing all user references to it.
3776 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3777 *
3778 * @returns VBox status code.
3779 * @retval VINF_SUCCESS on success.
3780 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3781 * @param pPool The pool.
3782 * @param HCPhys The HC physical address of the shadow page.
3783 */
3784int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3785{
3786 int rc = VINF_SUCCESS;
3787 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3788 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3789 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3790
3791 /*
3792 * Quietly reject any attempts at flushing any of the special root pages.
3793 */
3794 if (pPage->idx < PGMPOOL_IDX_FIRST)
3795 {
3796 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3797 return VINF_SUCCESS;
3798 }
3799
3800 /*
3801 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
3802 */
3803 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
3804 {
3805 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4, ("Can't free the shadow CR3! (%VGp vs %VGp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
3806 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3807 return VINF_SUCCESS;
3808 }
3809
3810 /*
3811 * Mark the page as being in need of a ASMMemZeroPage().
3812 */
3813 pPage->fZeroed = false;
3814
3815#ifdef PGMPOOL_WITH_USER_TRACKING
3816 /*
3817 * Clear the page.
3818 */
3819 pgmPoolTrackClearPageUsers(pPool, pPage);
3820 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3821 pgmPoolTrackDeref(pPool, pPage);
3822 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3823#endif
3824
3825#ifdef PGMPOOL_WITH_CACHE
3826 /*
3827 * Flush it from the cache.
3828 */
3829 pgmPoolCacheFlushPage(pPool, pPage);
3830#endif /* PGMPOOL_WITH_CACHE */
3831
3832#ifdef PGMPOOL_WITH_MONITORING
3833 /*
3834 * Deregistering the monitoring.
3835 */
3836 if (pPage->fMonitored)
3837 rc = pgmPoolMonitorFlush(pPool, pPage);
3838#endif
3839
3840 /*
3841 * Free the page.
3842 */
3843 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3844 pPage->iNext = pPool->iFreeHead;
3845 pPool->iFreeHead = pPage->idx;
3846 pPage->enmKind = PGMPOOLKIND_FREE;
3847 pPage->GCPhys = NIL_RTGCPHYS;
3848 pPage->fReusedFlushPending = false;
3849
3850 pPool->cUsedPages--;
3851 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3852 return rc;
3853}
3854
3855
3856/**
3857 * Frees a usage of a pool page.
3858 *
3859 * The caller is responsible to updating the user table so that it no longer
3860 * references the shadow page.
3861 *
3862 * @param pPool The pool.
3863 * @param HCPhys The HC physical address of the shadow page.
3864 * @param iUser The shadow page pool index of the user table.
3865 * @param iUserTable The index into the user table (shadowed).
3866 */
3867void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3868{
3869 STAM_PROFILE_START(&pPool->StatFree, a);
3870 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3871 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3872 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3873#ifdef PGMPOOL_WITH_USER_TRACKING
3874 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3875#endif
3876#ifdef PGMPOOL_WITH_CACHE
3877 if (!pPage->fCached)
3878#endif
3879 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3880 STAM_PROFILE_STOP(&pPool->StatFree, a);
3881}
3882
3883
3884/**
3885 * Makes one or more free page free.
3886 *
3887 * @returns VBox status code.
3888 * @retval VINF_SUCCESS on success.
3889 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3890 *
3891 * @param pPool The pool.
3892 * @param iUser The user of the page.
3893 */
3894static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3895{
3896 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3897
3898 /*
3899 * If the pool isn't full grown yet, expand it.
3900 */
3901 if (pPool->cCurPages < pPool->cMaxPages)
3902 {
3903 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3904#ifdef IN_RING3
3905 int rc = PGMR3PoolGrow(pPool->pVMR3);
3906#else
3907 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3908#endif
3909 if (VBOX_FAILURE(rc))
3910 return rc;
3911 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3912 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3913 return VINF_SUCCESS;
3914 }
3915
3916#ifdef PGMPOOL_WITH_CACHE
3917 /*
3918 * Free one cached page.
3919 */
3920 return pgmPoolCacheFreeOne(pPool, iUser);
3921#else
3922 /*
3923 * Flush the pool.
3924 * If we have tracking enabled, it should be possible to come up with
3925 * a cheap replacement strategy...
3926 */
3927 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
3928 Assert(!CPUMIsGuestInLongMode(pVM));
3929 pgmPoolFlushAllInt(pPool);
3930 return VERR_PGM_POOL_FLUSHED;
3931#endif
3932}
3933
3934
3935/**
3936 * Allocates a page from the pool.
3937 *
3938 * This page may actually be a cached page and not in need of any processing
3939 * on the callers part.
3940 *
3941 * @returns VBox status code.
3942 * @retval VINF_SUCCESS if a NEW page was allocated.
3943 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3944 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3945 * @param pVM The VM handle.
3946 * @param GCPhys The GC physical address of the page we're gonna shadow.
3947 * For 4MB and 2MB PD entries, it's the first address the
3948 * shadow PT is covering.
3949 * @param enmKind The kind of mapping.
3950 * @param iUser The shadow page pool index of the user table.
3951 * @param iUserTable The index into the user table (shadowed).
3952 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3953 */
3954int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
3955{
3956 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3957 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3958 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3959 *ppPage = NULL;
3960
3961#ifdef PGMPOOL_WITH_CACHE
3962 if (pPool->fCacheEnabled)
3963 {
3964 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3965 if (VBOX_SUCCESS(rc2))
3966 {
3967 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3968 LogFlow(("pgmPoolAlloc: cached returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3969 return rc2;
3970 }
3971 }
3972#endif
3973
3974 /*
3975 * Allocate a new one.
3976 */
3977 int rc = VINF_SUCCESS;
3978 uint16_t iNew = pPool->iFreeHead;
3979 if (iNew == NIL_PGMPOOL_IDX)
3980 {
3981 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3982 if (VBOX_FAILURE(rc))
3983 {
3984 if (rc != VERR_PGM_POOL_CLEARED)
3985 {
3986 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3987 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3988 return rc;
3989 }
3990 Log(("pgmPoolMakeMoreFreePages failed with %Vrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
3991 rc = VERR_PGM_POOL_FLUSHED;
3992 }
3993 iNew = pPool->iFreeHead;
3994 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3995 }
3996
3997 /* unlink the free head */
3998 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3999 pPool->iFreeHead = pPage->iNext;
4000 pPage->iNext = NIL_PGMPOOL_IDX;
4001
4002 /*
4003 * Initialize it.
4004 */
4005 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4006 pPage->enmKind = enmKind;
4007 pPage->GCPhys = GCPhys;
4008 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4009 pPage->fMonitored = false;
4010 pPage->fCached = false;
4011 pPage->fReusedFlushPending = false;
4012 pPage->fCR3Mix = false;
4013#ifdef PGMPOOL_WITH_MONITORING
4014 pPage->cModifications = 0;
4015 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4016 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4017#endif
4018#ifdef PGMPOOL_WITH_USER_TRACKING
4019 pPage->cPresent = 0;
4020 pPage->iFirstPresent = ~0;
4021
4022 /*
4023 * Insert into the tracking and cache. If this fails, free the page.
4024 */
4025 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4026 if (VBOX_FAILURE(rc3))
4027 {
4028 if (rc3 != VERR_PGM_POOL_CLEARED)
4029 {
4030 pPool->cUsedPages--;
4031 pPage->enmKind = PGMPOOLKIND_FREE;
4032 pPage->GCPhys = NIL_RTGCPHYS;
4033 pPage->iNext = pPool->iFreeHead;
4034 pPool->iFreeHead = pPage->idx;
4035 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4036 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
4037 return rc3;
4038 }
4039 Log(("pgmPoolTrackInsert failed with %Vrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4040 rc = VERR_PGM_POOL_FLUSHED;
4041 }
4042#endif /* PGMPOOL_WITH_USER_TRACKING */
4043
4044 /*
4045 * Commit the allocation, clear the page and return.
4046 */
4047#ifdef VBOX_WITH_STATISTICS
4048 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4049 pPool->cUsedPagesHigh = pPool->cUsedPages;
4050#endif
4051
4052 if (!pPage->fZeroed)
4053 {
4054 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4055 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4056 ASMMemZeroPage(pv);
4057 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4058 }
4059
4060 *ppPage = pPage;
4061 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4062 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4063 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4064 return rc;
4065}
4066
4067
4068/**
4069 * Frees a usage of a pool page.
4070 *
4071 * @param pVM The VM handle.
4072 * @param HCPhys The HC physical address of the shadow page.
4073 * @param iUser The shadow page pool index of the user table.
4074 * @param iUserTable The index into the user table (shadowed).
4075 */
4076void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4077{
4078 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4079 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4080 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4081}
4082
4083
4084/**
4085 * Gets a in-use page in the pool by it's physical address.
4086 *
4087 * @returns Pointer to the page.
4088 * @param pVM The VM handle.
4089 * @param HCPhys The HC physical address of the shadow page.
4090 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4091 */
4092PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4093{
4094 /** @todo profile this! */
4095 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4096 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4097 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
4098 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
4099 return pPage;
4100}
4101
4102
4103/**
4104 * Flushes the entire cache.
4105 *
4106 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4107 * and execute this CR3 flush.
4108 *
4109 * @param pPool The pool.
4110 */
4111void pgmPoolFlushAll(PVM pVM)
4112{
4113 LogFlow(("pgmPoolFlushAll:\n"));
4114 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4115}
4116
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette