VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 17462

Last change on this file since 17462 was 17460, checked in by vboxsync, 16 years ago

Again deal with dynamic mappings in RC.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 172.5 KB
Line 
1/* $Id: PGMAllPool.cpp 17460 2009-03-06 12:32:59Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48__BEGIN_DECLS
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50#ifdef PGMPOOL_WITH_USER_TRACKING
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54#endif
55#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
56static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
57#endif
58#ifdef PGMPOOL_WITH_CACHE
59static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
60#endif
61#ifdef PGMPOOL_WITH_MONITORING
62static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
63#endif
64#ifndef IN_RING3
65DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
66#endif
67#ifdef LOG_ENABLED
68static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
69#endif
70__END_DECLS
71
72
73/**
74 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
75 *
76 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
77 * @param enmKind The page kind.
78 */
79DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
80{
81 switch (enmKind)
82 {
83 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
84 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
85 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
86 return true;
87 default:
88 return false;
89 }
90}
91
92/** @def PGMPOOL_PAGE_2_LOCKED_PTR
93 * Maps a pool page pool into the current context and lock it (RC only).
94 *
95 * @returns VBox status code.
96 * @param pVM The VM handle.
97 * @param pPage The pool page.
98 *
99 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
100 * small page window employeed by that function. Be careful.
101 * @remark There is no need to assert on the result.
102 */
103#if defined(IN_RC)
104DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
105{
106 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
107
108 /* Make sure the dynamic mapping will not be reused. */
109 if (pv)
110 PGMDynLockHCPage(pVM, (uint8_t *)pv);
111
112 return pv;
113}
114#else
115# define PGMPOOL_PAGE_2_LOCKED_PTR(pPGM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
116#endif
117
118/** @def PGMPOOL_UNLOCK_PTR
119 * Unlock a previously locked dynamic caching (RC only).
120 *
121 * @returns VBox status code.
122 * @param pVM The VM handle.
123 * @param pPage The pool page.
124 *
125 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
126 * small page window employeed by that function. Be careful.
127 * @remark There is no need to assert on the result.
128 */
129#if defined(IN_RC)
130DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
131{
132 if (pvPage)
133 PGMDynLockHCPage(pVM, (uint8_t *)pvPage);
134}
135#else
136# define PGMPOOL_UNLOCK_PTR(pPGM, pPage) do {} while (0)
137#endif
138
139#if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
140/**
141 * Maps a pool page into the current context.
142 *
143 * @returns Pointer to the mapping.
144 * @param pPGM Pointer to the PGM instance data.
145 * @param pPage The page to map.
146 */
147void *pgmPoolMapPageFallback(PPGM pPGM, PPGMPOOLPAGE pPage)
148{
149 /* general pages are take care of by the inlined part, it
150 only ends up here in case of failure. */
151 AssertReleaseReturn(pPage->idx < PGMPOOL_IDX_FIRST, NULL);
152
153/** @todo make sure HCPhys is valid for *all* indexes. */
154 /* special pages. */
155# ifdef IN_RC
156 switch (pPage->idx)
157 {
158# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
159 case PGMPOOL_IDX_PD:
160 case PGMPOOL_IDX_PDPT:
161 case PGMPOOL_IDX_AMD64_CR3:
162 return pPGM->pShwRootRC;
163# else
164 case PGMPOOL_IDX_PD:
165 return pPGM->pShw32BitPdRC;
166 case PGMPOOL_IDX_PAE_PD:
167 case PGMPOOL_IDX_PAE_PD_0:
168 return pPGM->apShwPaePDsRC[0];
169 case PGMPOOL_IDX_PAE_PD_1:
170 return pPGM->apShwPaePDsRC[1];
171 case PGMPOOL_IDX_PAE_PD_2:
172 return pPGM->apShwPaePDsRC[2];
173 case PGMPOOL_IDX_PAE_PD_3:
174 return pPGM->apShwPaePDsRC[3];
175 case PGMPOOL_IDX_PDPT:
176 return pPGM->pShwPaePdptRC;
177# endif
178 default:
179 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
180 return NULL;
181 }
182
183# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
184 RTHCPHYS HCPhys;
185 switch (pPage->idx)
186 {
187# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
188 case PGMPOOL_IDX_PD:
189 case PGMPOOL_IDX_PDPT:
190 case PGMPOOL_IDX_AMD64_CR3:
191 HCPhys = pPGM->HCPhysShwCR3;
192 break;
193
194 case PGMPOOL_IDX_NESTED_ROOT:
195 HCPhys = pPGM->HCPhysShwNestedRoot;
196 break;
197# else
198 case PGMPOOL_IDX_PD:
199 HCPhys = pPGM->HCPhysShw32BitPD;
200 break;
201 case PGMPOOL_IDX_PAE_PD_0:
202 HCPhys = pPGM->aHCPhysPaePDs[0];
203 break;
204 case PGMPOOL_IDX_PAE_PD_1:
205 HCPhys = pPGM->aHCPhysPaePDs[1];
206 break;
207 case PGMPOOL_IDX_PAE_PD_2:
208 HCPhys = pPGM->aHCPhysPaePDs[2];
209 break;
210 case PGMPOOL_IDX_PAE_PD_3:
211 HCPhys = pPGM->aHCPhysPaePDs[3];
212 break;
213 case PGMPOOL_IDX_PDPT:
214 HCPhys = pPGM->HCPhysShwPaePdpt;
215 break;
216 case PGMPOOL_IDX_NESTED_ROOT:
217 HCPhys = pPGM->HCPhysShwNestedRoot;
218 break;
219 case PGMPOOL_IDX_PAE_PD:
220 AssertReleaseMsgFailed(("PGMPOOL_IDX_PAE_PD is not usable in VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 context\n"));
221 return NULL;
222# endif
223 default:
224 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
225 return NULL;
226 }
227 AssertMsg(HCPhys && HCPhys != NIL_RTHCPHYS && !(PAGE_OFFSET_MASK & HCPhys), ("%RHp\n", HCPhys));
228
229 void *pv;
230 pgmR0DynMapHCPageInlined(pPGM, HCPhys, &pv);
231 return pv;
232# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
233}
234#endif /* IN_RC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
235
236
237#ifdef PGMPOOL_WITH_MONITORING
238/**
239 * Determin the size of a write instruction.
240 * @returns number of bytes written.
241 * @param pDis The disassembler state.
242 */
243static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
244{
245 /*
246 * This is very crude and possibly wrong for some opcodes,
247 * but since it's not really supposed to be called we can
248 * probably live with that.
249 */
250 return DISGetParamSize(pDis, &pDis->param1);
251}
252
253
254/**
255 * Flushes a chain of pages sharing the same access monitor.
256 *
257 * @returns VBox status code suitable for scheduling.
258 * @param pPool The pool.
259 * @param pPage A page in the chain.
260 */
261int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
262{
263 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
264
265 /*
266 * Find the list head.
267 */
268 uint16_t idx = pPage->idx;
269 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
270 {
271 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
272 {
273 idx = pPage->iMonitoredPrev;
274 Assert(idx != pPage->idx);
275 pPage = &pPool->aPages[idx];
276 }
277 }
278
279 /*
280 * Iterate the list flushing each shadow page.
281 */
282 int rc = VINF_SUCCESS;
283 for (;;)
284 {
285 idx = pPage->iMonitoredNext;
286 Assert(idx != pPage->idx);
287 if (pPage->idx >= PGMPOOL_IDX_FIRST)
288 {
289 int rc2 = pgmPoolFlushPage(pPool, pPage);
290 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
291 rc = VINF_PGM_SYNC_CR3;
292 }
293 /* next */
294 if (idx == NIL_PGMPOOL_IDX)
295 break;
296 pPage = &pPool->aPages[idx];
297 }
298 return rc;
299}
300
301
302/**
303 * Wrapper for getting the current context pointer to the entry being modified.
304 *
305 * @returns VBox status code suitable for scheduling.
306 * @param pVM VM Handle.
307 * @param pvDst Destination address
308 * @param pvSrc Source guest virtual address.
309 * @param GCPhysSrc The source guest physical address.
310 * @param cb Size of data to read
311 */
312DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
313{
314#if defined(IN_RING3)
315 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
316 return VINF_SUCCESS;
317#else
318 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
319 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
320#endif
321}
322
323/**
324 * Process shadow entries before they are changed by the guest.
325 *
326 * For PT entries we will clear them. For PD entries, we'll simply check
327 * for mapping conflicts and set the SyncCR3 FF if found.
328 *
329 * @param pPool The pool.
330 * @param pPage The head page.
331 * @param GCPhysFault The guest physical fault address.
332 * @param uAddress In R0 and GC this is the guest context fault address (flat).
333 * In R3 this is the host context 'fault' address.
334 * @param pCpu The disassembler state for figuring out the write size.
335 * This need not be specified if the caller knows we won't do cross entry accesses.
336 */
337void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, PDISCPUSTATE pCpu)
338{
339 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
340 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
341 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
342 PVM pVM = pPool->CTX_SUFF(pVM);
343
344 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%s cbWrite=%d\n", (RTGCPTR)pvAddress, GCPhysFault, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
345 for (;;)
346 {
347 union
348 {
349 void *pv;
350 PX86PT pPT;
351 PX86PTPAE pPTPae;
352 PX86PD pPD;
353 PX86PDPAE pPDPae;
354 PX86PDPT pPDPT;
355 PX86PML4 pPML4;
356 } uShw;
357
358 uShw.pv = NULL;
359 switch (pPage->enmKind)
360 {
361 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
362 {
363 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
364 const unsigned iShw = off / sizeof(X86PTE);
365 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
366 if (uShw.pPT->a[iShw].n.u1Present)
367 {
368# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
369 X86PTE GstPte;
370
371 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
372 AssertRC(rc);
373 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
374 pgmPoolTracDerefGCPhysHint(pPool, pPage,
375 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
376 GstPte.u & X86_PTE_PG_MASK);
377# endif
378 uShw.pPT->a[iShw].u = 0;
379 }
380 break;
381 }
382
383 /* page/2 sized */
384 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
385 {
386 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
387 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
388 {
389 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
390 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
391 if (uShw.pPTPae->a[iShw].n.u1Present)
392 {
393# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
394 X86PTE GstPte;
395 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
396 AssertRC(rc);
397
398 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
399 pgmPoolTracDerefGCPhysHint(pPool, pPage,
400 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
401 GstPte.u & X86_PTE_PG_MASK);
402# endif
403 uShw.pPTPae->a[iShw].u = 0;
404 }
405 }
406 break;
407 }
408
409# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
410 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
411 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
412 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
413 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
414 {
415 unsigned iGst = off / sizeof(X86PDE);
416 unsigned iShwPdpt = iGst / 256;
417 unsigned iShw = (iGst % 256) * 2;
418 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
419
420 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
421 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
422 {
423 for (unsigned i = 0; i < 2; i++)
424 {
425# ifndef IN_RING0
426 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
427 {
428 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
429 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
430 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
431 break;
432 }
433 else
434# endif /* !IN_RING0 */
435 if (uShw.pPDPae->a[iShw+i].n.u1Present)
436 {
437 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
438 pgmPoolFree(pVM,
439 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
440 pPage->idx,
441 iShw + i);
442 uShw.pPDPae->a[iShw+i].u = 0;
443 }
444
445 /* paranoia / a bit assumptive. */
446 if ( pCpu
447 && (off & 3)
448 && (off & 3) + cbWrite > 4)
449 {
450 const unsigned iShw2 = iShw + 2 + i;
451 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
452 {
453# ifndef IN_RING0
454 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
455 {
456 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
457 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
458 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
459 break;
460 }
461 else
462# endif /* !IN_RING0 */
463 if (uShw.pPDPae->a[iShw2].n.u1Present)
464 {
465 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
466 pgmPoolFree(pVM,
467 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
468 pPage->idx,
469 iShw2);
470 uShw.pPDPae->a[iShw2].u = 0;
471 }
472 }
473 }
474 }
475 }
476 break;
477 }
478# endif
479
480
481 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
482 {
483 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
484 const unsigned iShw = off / sizeof(X86PTEPAE);
485 if (uShw.pPTPae->a[iShw].n.u1Present)
486 {
487# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
488 X86PTEPAE GstPte;
489 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
490 AssertRC(rc);
491
492 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
493 pgmPoolTracDerefGCPhysHint(pPool, pPage,
494 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
495 GstPte.u & X86_PTE_PAE_PG_MASK);
496# endif
497 uShw.pPTPae->a[iShw].u = 0;
498 }
499
500 /* paranoia / a bit assumptive. */
501 if ( pCpu
502 && (off & 7)
503 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
504 {
505 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
506 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
507
508 if (uShw.pPTPae->a[iShw2].n.u1Present)
509 {
510# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
511 X86PTEPAE GstPte;
512# ifdef IN_RING3
513 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
514# else
515 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
516# endif
517 AssertRC(rc);
518 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
519 pgmPoolTracDerefGCPhysHint(pPool, pPage,
520 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
521 GstPte.u & X86_PTE_PAE_PG_MASK);
522# endif
523 uShw.pPTPae->a[iShw2].u = 0;
524 }
525 }
526 break;
527 }
528
529# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
530 case PGMPOOLKIND_32BIT_PD:
531# else
532 case PGMPOOLKIND_ROOT_32BIT_PD:
533# endif
534 {
535 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
536 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
537
538# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
539 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
540# endif
541# ifndef IN_RING0
542 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
543 {
544 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
545 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
546 STAM_COUNTER_INC(&(pVM->pgm.s.StatRZGuestCR3WriteConflict));
547 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
548 break;
549 }
550# endif /* !IN_RING0 */
551# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
552# ifndef IN_RING0
553 else
554# endif /* !IN_RING0 */
555 {
556 if (uShw.pPD->a[iShw].n.u1Present)
557 {
558 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
559 pgmPoolFree(pVM,
560 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
561 pPage->idx,
562 iShw);
563 uShw.pPD->a[iShw].u = 0;
564 }
565 }
566# endif
567 /* paranoia / a bit assumptive. */
568 if ( pCpu
569 && (off & 3)
570 && (off & 3) + cbWrite > sizeof(X86PTE))
571 {
572 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
573 if ( iShw2 != iShw
574 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
575 {
576# ifndef IN_RING0
577 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
578 {
579 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
580 STAM_COUNTER_INC(&(pVM->pgm.s.StatRZGuestCR3WriteConflict));
581 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
582 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
583 }
584# endif /* !IN_RING0 */
585# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
586# ifndef IN_RING0
587 else
588# endif /* !IN_RING0 */
589 {
590 if (uShw.pPD->a[iShw2].n.u1Present)
591 {
592 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
593 pgmPoolFree(pVM,
594 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
595 pPage->idx,
596 iShw2);
597 uShw.pPD->a[iShw2].u = 0;
598 }
599 }
600# endif
601 }
602 }
603#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
604 if ( uShw.pPD->a[iShw].n.u1Present
605 && !VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
606 {
607 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
608# ifdef IN_RC /* TLB load - we're pushing things a bit... */
609 ASMProbeReadByte(pvAddress);
610# endif
611 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
612 uShw.pPD->a[iShw].u = 0;
613 }
614#endif
615 break;
616 }
617
618# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
619 case PGMPOOLKIND_ROOT_PAE_PD:
620 {
621 unsigned iGst = off / sizeof(X86PDE); // ASSUMING 32-bit guest paging!
622 unsigned iShwPdpt = iGst / 256;
623 unsigned iShw = (iGst % 256) * 2;
624 Assert(pPage->idx == PGMPOOL_IDX_PAE_PD);
625 PPGMPOOLPAGE pPage2 = pPage + 1 + iShwPdpt;
626 Assert(pPage2->idx == PGMPOOL_IDX_PAE_PD_0 + iShwPdpt);
627 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage2);
628 for (unsigned i = 0; i < 2; i++, iShw++)
629 {
630 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
631 {
632 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
633 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
634 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw));
635 }
636 /* paranoia / a bit assumptive. */
637 else if ( pCpu
638 && (off & 3)
639 && (off & 3) + cbWrite > 4)
640 {
641 const unsigned iShw2 = iShw + 2;
642 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a) /** @todo was completely wrong, it's better now after #1865 but still wrong from cross PD. */
643 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
644 {
645 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
646 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
647 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
648 }
649 }
650#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
651 if ( uShw.pPDPae->a[iShw].n.u1Present
652 && !VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
653 {
654 LogFlow(("pgmPoolMonitorChainChanging: iShwPdpt=%#x iShw=%#x: %RX64 -> freeing it!\n", iShwPdpt, iShw, uShw.pPDPae->a[iShw].u));
655# ifdef IN_RC /* TLB load - we're pushing things a bit... */
656 ASMProbeReadByte(pvAddress);
657# endif
658 pgmPoolFree(pVM, uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + iShwPdpt * X86_PG_PAE_ENTRIES);
659 uShw.pPDPae->a[iShw].u = 0;
660 }
661#endif
662 }
663 break;
664 }
665# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
666
667 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
668 {
669 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
670 const unsigned iShw = off / sizeof(X86PDEPAE);
671#ifndef IN_RING0
672 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
673 {
674 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
675 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
676 STAM_COUNTER_INC(&(pVM->pgm.s.StatRZGuestCR3WriteConflict));
677 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
678 break;
679 }
680#endif /* !IN_RING0 */
681#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
682 /*
683 * Causes trouble when the guest uses a PDE to refer to the whole page table level
684 * structure. (Invalidate here; faults later on when it tries to change the page
685 * table entries -> recheck; probably only applies to the RC case.)
686 */
687# ifndef IN_RING0
688 else
689# endif /* !IN_RING0 */
690 {
691 if (uShw.pPDPae->a[iShw].n.u1Present)
692 {
693 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
694 pgmPoolFree(pVM,
695 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
696# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
697 pPage->idx,
698 iShw);
699# else
700 /* Note: hardcoded PAE implementation dependency */
701 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
702 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
703# endif
704 uShw.pPDPae->a[iShw].u = 0;
705 }
706 }
707#endif
708 /* paranoia / a bit assumptive. */
709 if ( pCpu
710 && (off & 7)
711 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
712 {
713 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
714 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
715
716#ifndef IN_RING0
717 if ( iShw2 != iShw
718 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
719 {
720 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
721 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
722 STAM_COUNTER_INC(&(pVM->pgm.s.StatRZGuestCR3WriteConflict));
723 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
724 }
725#endif /* !IN_RING0 */
726#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
727# ifndef IN_RING0
728 else
729# endif /* !IN_RING0 */
730 if (uShw.pPDPae->a[iShw2].n.u1Present)
731 {
732 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
733 pgmPoolFree(pVM,
734 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
735# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
736 pPage->idx,
737 iShw2);
738# else
739 /* Note: hardcoded PAE implementation dependency */
740 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
741 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
742# endif
743 uShw.pPDPae->a[iShw2].u = 0;
744 }
745#endif
746 }
747 break;
748 }
749
750# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
751 case PGMPOOLKIND_PAE_PDPT:
752# else
753 case PGMPOOLKIND_ROOT_PDPT:
754# endif
755 {
756 /*
757 * Hopefully this doesn't happen very often:
758 * - touching unused parts of the page
759 * - messing with the bits of pd pointers without changing the physical address
760 */
761# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
762 /* PDPT roots are not page aligned; 32 byte only! */
763 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
764# else
765 const unsigned offPdpt = off;
766# endif
767 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
768 const unsigned iShw = offPdpt / sizeof(X86PDPE);
769 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
770 {
771# ifndef IN_RING0
772 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
773 {
774 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
775 STAM_COUNTER_INC(&(pVM->pgm.s.StatRZGuestCR3WriteConflict));
776 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
777 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
778 break;
779 }
780# endif /* !IN_RING0 */
781# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
782# ifndef IN_RING0
783 else
784# endif /* !IN_RING0 */
785 if (uShw.pPDPT->a[iShw].n.u1Present)
786 {
787 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
788 pgmPoolFree(pVM,
789 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
790 pPage->idx,
791 iShw);
792 uShw.pPDPT->a[iShw].u = 0;
793 }
794# endif
795
796 /* paranoia / a bit assumptive. */
797 if ( pCpu
798 && (offPdpt & 7)
799 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
800 {
801 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
802 if ( iShw2 != iShw
803 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
804 {
805# ifndef IN_RING0
806 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
807 {
808 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
809 STAM_COUNTER_INC(&(pVM->pgm.s.StatRZGuestCR3WriteConflict));
810 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
811 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
812 }
813# endif /* !IN_RING0 */
814# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
815# ifndef IN_RING0
816 else
817# endif /* !IN_RING0 */
818 if (uShw.pPDPT->a[iShw2].n.u1Present)
819 {
820 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
821 pgmPoolFree(pVM,
822 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
823 pPage->idx,
824 iShw2);
825 uShw.pPDPT->a[iShw2].u = 0;
826 }
827# endif
828 }
829 }
830 }
831 break;
832 }
833
834#ifndef IN_RC
835 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
836 {
837 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
838 const unsigned iShw = off / sizeof(X86PDEPAE);
839 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
840 if (uShw.pPDPae->a[iShw].n.u1Present)
841 {
842 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
843 pgmPoolFree(pVM,
844 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
845 pPage->idx,
846 iShw);
847 uShw.pPDPae->a[iShw].u = 0;
848 }
849 /* paranoia / a bit assumptive. */
850 if ( pCpu
851 && (off & 7)
852 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
853 {
854 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
855 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
856
857 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
858 if (uShw.pPDPae->a[iShw2].n.u1Present)
859 {
860 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
861 pgmPoolFree(pVM,
862 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
863 pPage->idx,
864 iShw2);
865 uShw.pPDPae->a[iShw2].u = 0;
866 }
867 }
868 break;
869 }
870
871 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
872 {
873 /*
874 * Hopefully this doesn't happen very often:
875 * - messing with the bits of pd pointers without changing the physical address
876 */
877# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
878 if (!VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
879# endif
880 {
881 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
882 const unsigned iShw = off / sizeof(X86PDPE);
883 if (uShw.pPDPT->a[iShw].n.u1Present)
884 {
885 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
886 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
887 uShw.pPDPT->a[iShw].u = 0;
888 }
889 /* paranoia / a bit assumptive. */
890 if ( pCpu
891 && (off & 7)
892 && (off & 7) + cbWrite > sizeof(X86PDPE))
893 {
894 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
895 if (uShw.pPDPT->a[iShw2].n.u1Present)
896 {
897 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
898 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
899 uShw.pPDPT->a[iShw2].u = 0;
900 }
901 }
902 }
903 break;
904 }
905
906 case PGMPOOLKIND_64BIT_PML4:
907 {
908 /*
909 * Hopefully this doesn't happen very often:
910 * - messing with the bits of pd pointers without changing the physical address
911 */
912# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
913 if (!VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
914# endif
915 {
916 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
917 const unsigned iShw = off / sizeof(X86PDPE);
918 if (uShw.pPML4->a[iShw].n.u1Present)
919 {
920 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
921 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
922 uShw.pPML4->a[iShw].u = 0;
923 }
924 /* paranoia / a bit assumptive. */
925 if ( pCpu
926 && (off & 7)
927 && (off & 7) + cbWrite > sizeof(X86PDPE))
928 {
929 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
930 if (uShw.pPML4->a[iShw2].n.u1Present)
931 {
932 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
933 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
934 uShw.pPML4->a[iShw2].u = 0;
935 }
936 }
937 }
938 break;
939 }
940#endif /* IN_RING0 */
941
942 default:
943 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
944 }
945 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
946
947 /* next */
948 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
949 return;
950 pPage = &pPool->aPages[pPage->iMonitoredNext];
951 }
952}
953
954# ifndef IN_RING3
955/**
956 * Checks if a access could be a fork operation in progress.
957 *
958 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
959 *
960 * @returns true if it's likly that we're forking, otherwise false.
961 * @param pPool The pool.
962 * @param pCpu The disassembled instruction.
963 * @param offFault The access offset.
964 */
965DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
966{
967 /*
968 * i386 linux is using btr to clear X86_PTE_RW.
969 * The functions involved are (2.6.16 source inspection):
970 * clear_bit
971 * ptep_set_wrprotect
972 * copy_one_pte
973 * copy_pte_range
974 * copy_pmd_range
975 * copy_pud_range
976 * copy_page_range
977 * dup_mmap
978 * dup_mm
979 * copy_mm
980 * copy_process
981 * do_fork
982 */
983 if ( pCpu->pCurInstr->opcode == OP_BTR
984 && !(offFault & 4)
985 /** @todo Validate that the bit index is X86_PTE_RW. */
986 )
987 {
988 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
989 return true;
990 }
991 return false;
992}
993
994
995/**
996 * Determine whether the page is likely to have been reused.
997 *
998 * @returns true if we consider the page as being reused for a different purpose.
999 * @returns false if we consider it to still be a paging page.
1000 * @param pVM VM Handle.
1001 * @param pPage The page in question.
1002 * @param pRegFrame Trap register frame.
1003 * @param pCpu The disassembly info for the faulting instruction.
1004 * @param pvFault The fault address.
1005 *
1006 * @remark The REP prefix check is left to the caller because of STOSD/W.
1007 */
1008DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
1009{
1010#ifndef IN_RC
1011 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
1012 if ( HWACCMHasPendingIrq(pVM)
1013 && (pRegFrame->rsp - pvFault) < 32)
1014 {
1015 /* Fault caused by stack writes while trying to inject an interrupt event. */
1016 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
1017 return true;
1018 }
1019#else
1020 NOREF(pVM); NOREF(pvFault);
1021#endif
1022
1023 switch (pCpu->pCurInstr->opcode)
1024 {
1025 /* call implies the actual push of the return address faulted */
1026 case OP_CALL:
1027 Log4(("pgmPoolMonitorIsReused: CALL\n"));
1028 return true;
1029 case OP_PUSH:
1030 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
1031 return true;
1032 case OP_PUSHF:
1033 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
1034 return true;
1035 case OP_PUSHA:
1036 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
1037 return true;
1038 case OP_FXSAVE:
1039 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
1040 return true;
1041 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
1042 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
1043 return true;
1044 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
1045 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
1046 return true;
1047 case OP_MOVSWD:
1048 case OP_STOSWD:
1049 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
1050 && pRegFrame->rcx >= 0x40
1051 )
1052 {
1053 Assert(pCpu->mode == CPUMODE_64BIT);
1054
1055 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
1056 return true;
1057 }
1058 return false;
1059 }
1060 if ( (pCpu->param1.flags & USE_REG_GEN32)
1061 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
1062 {
1063 Log4(("pgmPoolMonitorIsReused: ESP\n"));
1064 return true;
1065 }
1066
1067 //if (pPage->fCR3Mix)
1068 // return false;
1069 return false;
1070}
1071
1072
1073/**
1074 * Flushes the page being accessed.
1075 *
1076 * @returns VBox status code suitable for scheduling.
1077 * @param pVM The VM handle.
1078 * @param pPool The pool.
1079 * @param pPage The pool page (head).
1080 * @param pCpu The disassembly of the write instruction.
1081 * @param pRegFrame The trap register frame.
1082 * @param GCPhysFault The fault address as guest physical address.
1083 * @param pvFault The fault address.
1084 */
1085static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1086 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1087{
1088 /*
1089 * First, do the flushing.
1090 */
1091 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
1092
1093 /*
1094 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
1095 */
1096 uint32_t cbWritten;
1097 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
1098 if (RT_SUCCESS(rc2))
1099 pRegFrame->rip += pCpu->opsize;
1100 else if (rc2 == VERR_EM_INTERPRETER)
1101 {
1102#ifdef IN_RC
1103 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
1104 {
1105 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
1106 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
1107 rc = VINF_SUCCESS;
1108 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
1109 }
1110 else
1111#endif
1112 {
1113 rc = VINF_EM_RAW_EMULATE_INSTR;
1114 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1115 }
1116 }
1117 else
1118 rc = rc2;
1119
1120 /* See use in pgmPoolAccessHandlerSimple(). */
1121 PGM_INVL_GUEST_TLBS();
1122
1123 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
1124 return rc;
1125
1126}
1127
1128
1129/**
1130 * Handles the STOSD write accesses.
1131 *
1132 * @returns VBox status code suitable for scheduling.
1133 * @param pVM The VM handle.
1134 * @param pPool The pool.
1135 * @param pPage The pool page (head).
1136 * @param pCpu The disassembly of the write instruction.
1137 * @param pRegFrame The trap register frame.
1138 * @param GCPhysFault The fault address as guest physical address.
1139 * @param pvFault The fault address.
1140 */
1141DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1142 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1143{
1144 Assert(pCpu->mode == CPUMODE_32BIT);
1145
1146 Log3(("pgmPoolAccessHandlerSTOSD\n"));
1147
1148 /*
1149 * Increment the modification counter and insert it into the list
1150 * of modified pages the first time.
1151 */
1152 if (!pPage->cModifications++)
1153 pgmPoolMonitorModifiedInsert(pPool, pPage);
1154
1155 /*
1156 * Execute REP STOSD.
1157 *
1158 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
1159 * write situation, meaning that it's safe to write here.
1160 */
1161#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1162 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1163#endif
1164 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
1165 while (pRegFrame->ecx)
1166 {
1167#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1168 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1169 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1170 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1171#else
1172 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1173#endif
1174#ifdef IN_RC
1175 *(uint32_t *)pu32 = pRegFrame->eax;
1176#else
1177 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
1178#endif
1179 pu32 += 4;
1180 GCPhysFault += 4;
1181 pRegFrame->edi += 4;
1182 pRegFrame->ecx--;
1183 }
1184 pRegFrame->rip += pCpu->opsize;
1185
1186#ifdef IN_RC
1187 /* See use in pgmPoolAccessHandlerSimple(). */
1188 PGM_INVL_GUEST_TLBS();
1189#endif
1190
1191 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1192 return VINF_SUCCESS;
1193}
1194
1195
1196/**
1197 * Handles the simple write accesses.
1198 *
1199 * @returns VBox status code suitable for scheduling.
1200 * @param pVM The VM handle.
1201 * @param pPool The pool.
1202 * @param pPage The pool page (head).
1203 * @param pCpu The disassembly of the write instruction.
1204 * @param pRegFrame The trap register frame.
1205 * @param GCPhysFault The fault address as guest physical address.
1206 * @param pvFault The fault address.
1207 */
1208DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1209 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1210{
1211 Log3(("pgmPoolAccessHandlerSimple\n"));
1212 /*
1213 * Increment the modification counter and insert it into the list
1214 * of modified pages the first time.
1215 */
1216 if (!pPage->cModifications++)
1217 pgmPoolMonitorModifiedInsert(pPool, pPage);
1218
1219 /*
1220 * Clear all the pages. ASSUMES that pvFault is readable.
1221 */
1222#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1223 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1224 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1225 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1226 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1227#else
1228 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1229#endif
1230
1231 /*
1232 * Interpret the instruction.
1233 */
1234 uint32_t cb;
1235 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
1236 if (RT_SUCCESS(rc))
1237 pRegFrame->rip += pCpu->opsize;
1238 else if (rc == VERR_EM_INTERPRETER)
1239 {
1240 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1241 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
1242 rc = VINF_EM_RAW_EMULATE_INSTR;
1243 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1244 }
1245
1246#ifdef IN_RC
1247 /*
1248 * Quick hack, with logging enabled we're getting stale
1249 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1250 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1251 * have to be fixed to support this. But that'll have to wait till next week.
1252 *
1253 * An alternative is to keep track of the changed PTEs together with the
1254 * GCPhys from the guest PT. This may proove expensive though.
1255 *
1256 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1257 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1258 */
1259 PGM_INVL_GUEST_TLBS();
1260#endif
1261
1262 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1263 return rc;
1264}
1265
1266/**
1267 * \#PF Handler callback for PT write accesses.
1268 *
1269 * @returns VBox status code (appropriate for GC return).
1270 * @param pVM VM Handle.
1271 * @param uErrorCode CPU Error code.
1272 * @param pRegFrame Trap register frame.
1273 * NULL on DMA and other non CPU access.
1274 * @param pvFault The fault address (cr2).
1275 * @param GCPhysFault The GC physical address corresponding to pvFault.
1276 * @param pvUser User argument.
1277 */
1278DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1279{
1280 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1281 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1282 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1283 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1284
1285 /*
1286 * We should ALWAYS have the list head as user parameter. This
1287 * is because we use that page to record the changes.
1288 */
1289 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1290
1291 /*
1292 * Disassemble the faulting instruction.
1293 */
1294 DISCPUSTATE Cpu;
1295 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1296 AssertRCReturn(rc, rc);
1297
1298 /*
1299 * Check if it's worth dealing with.
1300 */
1301 bool fReused = false;
1302 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1303#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1304 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1305#else
1306 || pPage->fCR3Mix
1307#endif
1308 )
1309 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1310 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1311 {
1312 /*
1313 * Simple instructions, no REP prefix.
1314 */
1315 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1316 {
1317 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1318 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1319 return rc;
1320 }
1321
1322 /*
1323 * Windows is frequently doing small memset() operations (netio test 4k+).
1324 * We have to deal with these or we'll kill the cache and performance.
1325 */
1326 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1327 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1328 && pRegFrame->ecx <= 0x20
1329 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1330 && !((uintptr_t)pvFault & 3)
1331 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1332 && Cpu.mode == CPUMODE_32BIT
1333 && Cpu.opmode == CPUMODE_32BIT
1334 && Cpu.addrmode == CPUMODE_32BIT
1335 && Cpu.prefix == PREFIX_REP
1336 && !pRegFrame->eflags.Bits.u1DF
1337 )
1338 {
1339 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1340 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1341 return rc;
1342 }
1343
1344 /* REP prefix, don't bother. */
1345 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1346 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1347 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1348 }
1349
1350 /*
1351 * Not worth it, so flush it.
1352 *
1353 * If we considered it to be reused, don't to back to ring-3
1354 * to emulate failed instructions since we usually cannot
1355 * interpret then. This may be a bit risky, in which case
1356 * the reuse detection must be fixed.
1357 */
1358 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1359 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1360 rc = VINF_SUCCESS;
1361 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1362 return rc;
1363}
1364
1365# endif /* !IN_RING3 */
1366#endif /* PGMPOOL_WITH_MONITORING */
1367
1368#ifdef PGMPOOL_WITH_CACHE
1369
1370/**
1371 * Inserts a page into the GCPhys hash table.
1372 *
1373 * @param pPool The pool.
1374 * @param pPage The page.
1375 */
1376DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1377{
1378 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1379 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1380 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1381 pPage->iNext = pPool->aiHash[iHash];
1382 pPool->aiHash[iHash] = pPage->idx;
1383}
1384
1385
1386/**
1387 * Removes a page from the GCPhys hash table.
1388 *
1389 * @param pPool The pool.
1390 * @param pPage The page.
1391 */
1392DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1393{
1394 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1395 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1396 if (pPool->aiHash[iHash] == pPage->idx)
1397 pPool->aiHash[iHash] = pPage->iNext;
1398 else
1399 {
1400 uint16_t iPrev = pPool->aiHash[iHash];
1401 for (;;)
1402 {
1403 const int16_t i = pPool->aPages[iPrev].iNext;
1404 if (i == pPage->idx)
1405 {
1406 pPool->aPages[iPrev].iNext = pPage->iNext;
1407 break;
1408 }
1409 if (i == NIL_PGMPOOL_IDX)
1410 {
1411 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1412 break;
1413 }
1414 iPrev = i;
1415 }
1416 }
1417 pPage->iNext = NIL_PGMPOOL_IDX;
1418}
1419
1420
1421/**
1422 * Frees up one cache page.
1423 *
1424 * @returns VBox status code.
1425 * @retval VINF_SUCCESS on success.
1426 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1427 * @param pPool The pool.
1428 * @param iUser The user index.
1429 */
1430static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1431{
1432#ifndef IN_RC
1433 const PVM pVM = pPool->CTX_SUFF(pVM);
1434#endif
1435 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1436 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1437
1438 /*
1439 * Select one page from the tail of the age list.
1440 */
1441 uint16_t iToFree = pPool->iAgeTail;
1442 if (iToFree == iUser)
1443 iToFree = pPool->aPages[iToFree].iAgePrev;
1444/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1445 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1446 {
1447 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1448 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1449 {
1450 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1451 continue;
1452 iToFree = i;
1453 break;
1454 }
1455 }
1456*/
1457
1458 Assert(iToFree != iUser);
1459 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1460
1461 PPGMPOOLPAGE pPage = &pPool->aPages[iToFree];
1462
1463 /*
1464 * Reject any attempts at flushing the currently active shadow CR3 mapping
1465 */
1466#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1467 if (pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1468#else
1469 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
1470#endif
1471 {
1472 /* Refresh the cr3 mapping by putting it at the head of the age list. */
1473 LogFlow(("pgmPoolCacheFreeOne refuse CR3 mapping\n"));
1474 pgmPoolCacheUsed(pPool, pPage);
1475 return pgmPoolCacheFreeOne(pPool, iUser);
1476 }
1477
1478 int rc = pgmPoolFlushPage(pPool, pPage);
1479 if (rc == VINF_SUCCESS)
1480 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1481 return rc;
1482}
1483
1484
1485/**
1486 * Checks if a kind mismatch is really a page being reused
1487 * or if it's just normal remappings.
1488 *
1489 * @returns true if reused and the cached page (enmKind1) should be flushed
1490 * @returns false if not reused.
1491 * @param enmKind1 The kind of the cached page.
1492 * @param enmKind2 The kind of the requested page.
1493 */
1494static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1495{
1496 switch (enmKind1)
1497 {
1498 /*
1499 * Never reuse them. There is no remapping in non-paging mode.
1500 */
1501 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1502 case PGMPOOLKIND_32BIT_PD_PHYS:
1503 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1504 case PGMPOOLKIND_PAE_PD_PHYS:
1505 case PGMPOOLKIND_PAE_PDPT_PHYS:
1506 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1507 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1508 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1509 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1510 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1511#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1512 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1513 return false;
1514#else
1515 return true;
1516#endif
1517
1518 /*
1519 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1520 */
1521 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1522 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1523 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1524 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1525 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1526 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1527 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1528 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1529 case PGMPOOLKIND_32BIT_PD:
1530 case PGMPOOLKIND_PAE_PDPT:
1531 switch (enmKind2)
1532 {
1533 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1534 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1535 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1536 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1537 case PGMPOOLKIND_64BIT_PML4:
1538 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1539 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1540 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1541 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1542 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1543 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1544 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1545 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1546 return true;
1547 default:
1548 return false;
1549 }
1550
1551 /*
1552 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1553 */
1554 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1555 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1556 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1557 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1558 case PGMPOOLKIND_64BIT_PML4:
1559 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1560 switch (enmKind2)
1561 {
1562 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1563 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1564 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1565 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1566 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1567 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1568 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1569 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1570 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1571 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1572 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1573 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1574 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1575 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1576 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1577 return true;
1578 default:
1579 return false;
1580 }
1581
1582 /*
1583 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1584 */
1585#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1586 case PGMPOOLKIND_ROOT_32BIT_PD:
1587 case PGMPOOLKIND_ROOT_PAE_PD:
1588 case PGMPOOLKIND_ROOT_PDPT:
1589#endif
1590 case PGMPOOLKIND_ROOT_NESTED:
1591 return false;
1592
1593 default:
1594 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1595 }
1596}
1597
1598
1599/**
1600 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1601 *
1602 * @returns VBox status code.
1603 * @retval VINF_PGM_CACHED_PAGE on success.
1604 * @retval VERR_FILE_NOT_FOUND if not found.
1605 * @param pPool The pool.
1606 * @param GCPhys The GC physical address of the page we're gonna shadow.
1607 * @param enmKind The kind of mapping.
1608 * @param iUser The shadow page pool index of the user table.
1609 * @param iUserTable The index into the user table (shadowed).
1610 * @param ppPage Where to store the pointer to the page.
1611 */
1612static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1613{
1614#ifndef IN_RC
1615 const PVM pVM = pPool->CTX_SUFF(pVM);
1616#endif
1617 /*
1618 * Look up the GCPhys in the hash.
1619 */
1620 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1621 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
1622 if (i != NIL_PGMPOOL_IDX)
1623 {
1624 do
1625 {
1626 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1627 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1628 if (pPage->GCPhys == GCPhys)
1629 {
1630 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1631 {
1632 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
1633 * doesn't flush it in case there are no more free use records.
1634 */
1635 pgmPoolCacheUsed(pPool, pPage);
1636
1637 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1638 if (RT_SUCCESS(rc))
1639 {
1640 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
1641 *ppPage = pPage;
1642 STAM_COUNTER_INC(&pPool->StatCacheHits);
1643 return VINF_PGM_CACHED_PAGE;
1644 }
1645 return rc;
1646 }
1647
1648 /*
1649 * The kind is different. In some cases we should now flush the page
1650 * as it has been reused, but in most cases this is normal remapping
1651 * of PDs as PT or big pages using the GCPhys field in a slightly
1652 * different way than the other kinds.
1653 */
1654 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1655 {
1656 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1657 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1658 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1659 break;
1660 }
1661 }
1662
1663 /* next */
1664 i = pPage->iNext;
1665 } while (i != NIL_PGMPOOL_IDX);
1666 }
1667
1668 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
1669 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1670 return VERR_FILE_NOT_FOUND;
1671}
1672
1673
1674/**
1675 * Inserts a page into the cache.
1676 *
1677 * @param pPool The pool.
1678 * @param pPage The cached page.
1679 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1680 */
1681static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1682{
1683 /*
1684 * Insert into the GCPhys hash if the page is fit for that.
1685 */
1686 Assert(!pPage->fCached);
1687 if (fCanBeCached)
1688 {
1689 pPage->fCached = true;
1690 pgmPoolHashInsert(pPool, pPage);
1691 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1692 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1693 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1694 }
1695 else
1696 {
1697 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1698 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1699 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1700 }
1701
1702 /*
1703 * Insert at the head of the age list.
1704 */
1705 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1706 pPage->iAgeNext = pPool->iAgeHead;
1707 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1708 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1709 else
1710 pPool->iAgeTail = pPage->idx;
1711 pPool->iAgeHead = pPage->idx;
1712}
1713
1714
1715/**
1716 * Flushes a cached page.
1717 *
1718 * @param pPool The pool.
1719 * @param pPage The cached page.
1720 */
1721static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1722{
1723 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1724
1725 /*
1726 * Remove the page from the hash.
1727 */
1728 if (pPage->fCached)
1729 {
1730 pPage->fCached = false;
1731 pgmPoolHashRemove(pPool, pPage);
1732 }
1733 else
1734 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1735
1736 /*
1737 * Remove it from the age list.
1738 */
1739 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1740 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1741 else
1742 pPool->iAgeTail = pPage->iAgePrev;
1743 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1744 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1745 else
1746 pPool->iAgeHead = pPage->iAgeNext;
1747 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1748 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1749}
1750
1751#endif /* PGMPOOL_WITH_CACHE */
1752#ifdef PGMPOOL_WITH_MONITORING
1753
1754/**
1755 * Looks for pages sharing the monitor.
1756 *
1757 * @returns Pointer to the head page.
1758 * @returns NULL if not found.
1759 * @param pPool The Pool
1760 * @param pNewPage The page which is going to be monitored.
1761 */
1762static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1763{
1764#ifdef PGMPOOL_WITH_CACHE
1765 /*
1766 * Look up the GCPhys in the hash.
1767 */
1768 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1769 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1770 if (i == NIL_PGMPOOL_IDX)
1771 return NULL;
1772 do
1773 {
1774 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1775 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1776 && pPage != pNewPage)
1777 {
1778 switch (pPage->enmKind)
1779 {
1780 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1781 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1782 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1783 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1784 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1785 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1786 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1787 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1788 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1789 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1790 case PGMPOOLKIND_64BIT_PML4:
1791#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1792 case PGMPOOLKIND_32BIT_PD:
1793 case PGMPOOLKIND_PAE_PDPT:
1794#else
1795 case PGMPOOLKIND_ROOT_32BIT_PD:
1796 case PGMPOOLKIND_ROOT_PAE_PD:
1797 case PGMPOOLKIND_ROOT_PDPT:
1798#endif
1799 {
1800 /* find the head */
1801 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1802 {
1803 Assert(pPage->iMonitoredPrev != pPage->idx);
1804 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1805 }
1806 return pPage;
1807 }
1808
1809 /* ignore, no monitoring. */
1810 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1811 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1812 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1813 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1814 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1815 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1816 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1817 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1818 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1819 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1820 case PGMPOOLKIND_ROOT_NESTED:
1821 case PGMPOOLKIND_PAE_PD_PHYS:
1822 case PGMPOOLKIND_PAE_PDPT_PHYS:
1823 case PGMPOOLKIND_32BIT_PD_PHYS:
1824#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1825 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1826#endif
1827 break;
1828 default:
1829 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1830 }
1831 }
1832
1833 /* next */
1834 i = pPage->iNext;
1835 } while (i != NIL_PGMPOOL_IDX);
1836#endif
1837 return NULL;
1838}
1839
1840
1841/**
1842 * Enabled write monitoring of a guest page.
1843 *
1844 * @returns VBox status code.
1845 * @retval VINF_SUCCESS on success.
1846 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1847 * @param pPool The pool.
1848 * @param pPage The cached page.
1849 */
1850static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1851{
1852 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1853
1854 /*
1855 * Filter out the relevant kinds.
1856 */
1857 switch (pPage->enmKind)
1858 {
1859 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1860 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1861 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1862 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1863 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1864 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1865 case PGMPOOLKIND_64BIT_PML4:
1866#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1867 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1868 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1869 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1870 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1871 case PGMPOOLKIND_32BIT_PD:
1872 case PGMPOOLKIND_PAE_PDPT:
1873#else
1874 case PGMPOOLKIND_ROOT_PDPT:
1875#endif
1876 break;
1877
1878 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1879 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1880 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1881 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1882 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1883 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1884 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1885 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1886 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1887 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1888 case PGMPOOLKIND_ROOT_NESTED:
1889 /* Nothing to monitor here. */
1890 return VINF_SUCCESS;
1891
1892#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1893 case PGMPOOLKIND_32BIT_PD_PHYS:
1894 case PGMPOOLKIND_PAE_PDPT_PHYS:
1895 case PGMPOOLKIND_PAE_PD_PHYS:
1896 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1897 /* Nothing to monitor here. */
1898 return VINF_SUCCESS;
1899#else
1900 case PGMPOOLKIND_ROOT_32BIT_PD:
1901 case PGMPOOLKIND_ROOT_PAE_PD:
1902#endif
1903#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1904 break;
1905#else
1906 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1907#endif
1908 default:
1909 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1910 }
1911
1912 /*
1913 * Install handler.
1914 */
1915 int rc;
1916 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1917 if (pPageHead)
1918 {
1919 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1920 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1921 pPage->iMonitoredPrev = pPageHead->idx;
1922 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1923 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1924 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1925 pPageHead->iMonitoredNext = pPage->idx;
1926 rc = VINF_SUCCESS;
1927 }
1928 else
1929 {
1930 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1931 PVM pVM = pPool->CTX_SUFF(pVM);
1932 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1933 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1934 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1935 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1936 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1937 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1938 pPool->pszAccessHandler);
1939 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1940 * the heap size should suffice. */
1941 AssertFatalRC(rc);
1942 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1943 rc = VERR_PGM_POOL_CLEARED;
1944 }
1945 pPage->fMonitored = true;
1946 return rc;
1947}
1948
1949
1950/**
1951 * Disables write monitoring of a guest page.
1952 *
1953 * @returns VBox status code.
1954 * @retval VINF_SUCCESS on success.
1955 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1956 * @param pPool The pool.
1957 * @param pPage The cached page.
1958 */
1959static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1960{
1961 /*
1962 * Filter out the relevant kinds.
1963 */
1964 switch (pPage->enmKind)
1965 {
1966 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1967 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1968 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1969 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1970 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1971 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1972 case PGMPOOLKIND_64BIT_PML4:
1973#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1974 case PGMPOOLKIND_32BIT_PD:
1975 case PGMPOOLKIND_PAE_PDPT:
1976 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1977 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1978 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1979 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1980#else
1981 case PGMPOOLKIND_ROOT_PDPT:
1982#endif
1983 break;
1984
1985 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1986 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1987 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1988 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1989 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1990 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1991 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1992 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1993 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1994 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1995 case PGMPOOLKIND_ROOT_NESTED:
1996 case PGMPOOLKIND_PAE_PD_PHYS:
1997 case PGMPOOLKIND_PAE_PDPT_PHYS:
1998 case PGMPOOLKIND_32BIT_PD_PHYS:
1999 /* Nothing to monitor here. */
2000 return VINF_SUCCESS;
2001
2002#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2003 case PGMPOOLKIND_ROOT_32BIT_PD:
2004 case PGMPOOLKIND_ROOT_PAE_PD:
2005#endif
2006#ifdef PGMPOOL_WITH_MIXED_PT_CR3
2007 break;
2008#endif
2009#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2010 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2011#endif
2012 default:
2013 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2014 }
2015
2016 /*
2017 * Remove the page from the monitored list or uninstall it if last.
2018 */
2019 const PVM pVM = pPool->CTX_SUFF(pVM);
2020 int rc;
2021 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2022 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2023 {
2024 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2025 {
2026 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2027 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2028#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2029 pNewHead->fCR3Mix = pPage->fCR3Mix;
2030#endif
2031 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2032 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2033 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2034 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2035 pPool->pszAccessHandler);
2036 AssertFatalRCSuccess(rc);
2037 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2038 }
2039 else
2040 {
2041 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2042 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2043 {
2044 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2045 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2046 }
2047 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2048 rc = VINF_SUCCESS;
2049 }
2050 }
2051 else
2052 {
2053 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2054 AssertFatalRC(rc);
2055 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2056 rc = VERR_PGM_POOL_CLEARED;
2057 }
2058 pPage->fMonitored = false;
2059
2060 /*
2061 * Remove it from the list of modified pages (if in it).
2062 */
2063 pgmPoolMonitorModifiedRemove(pPool, pPage);
2064
2065 return rc;
2066}
2067
2068# if defined(PGMPOOL_WITH_MIXED_PT_CR3) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2069
2070/**
2071 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
2072 *
2073 * @param pPool The Pool.
2074 * @param pPage A page in the chain.
2075 * @param fCR3Mix The new fCR3Mix value.
2076 */
2077static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
2078{
2079 /* current */
2080 pPage->fCR3Mix = fCR3Mix;
2081
2082 /* before */
2083 int16_t idx = pPage->iMonitoredPrev;
2084 while (idx != NIL_PGMPOOL_IDX)
2085 {
2086 pPool->aPages[idx].fCR3Mix = fCR3Mix;
2087 idx = pPool->aPages[idx].iMonitoredPrev;
2088 }
2089
2090 /* after */
2091 idx = pPage->iMonitoredNext;
2092 while (idx != NIL_PGMPOOL_IDX)
2093 {
2094 pPool->aPages[idx].fCR3Mix = fCR3Mix;
2095 idx = pPool->aPages[idx].iMonitoredNext;
2096 }
2097}
2098
2099
2100/**
2101 * Installs or modifies monitoring of a CR3 page (special).
2102 *
2103 * We're pretending the CR3 page is shadowed by the pool so we can use the
2104 * generic mechanisms in detecting chained monitoring. (This also gives us a
2105 * tast of what code changes are required to really pool CR3 shadow pages.)
2106 *
2107 * @returns VBox status code.
2108 * @param pPool The pool.
2109 * @param idxRoot The CR3 (root) page index.
2110 * @param GCPhysCR3 The (new) CR3 value.
2111 */
2112int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
2113{
2114 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
2115 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
2116 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d} GCPhysCR3=%RGp\n",
2117 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
2118
2119 /*
2120 * The unlikely case where it already matches.
2121 */
2122 if (pPage->GCPhys == GCPhysCR3)
2123 {
2124 Assert(pPage->fMonitored);
2125 return VINF_SUCCESS;
2126 }
2127
2128 /*
2129 * Flush the current monitoring and remove it from the hash.
2130 */
2131 int rc = VINF_SUCCESS;
2132 if (pPage->fMonitored)
2133 {
2134 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
2135 rc = pgmPoolMonitorFlush(pPool, pPage);
2136 if (rc == VERR_PGM_POOL_CLEARED)
2137 rc = VINF_SUCCESS;
2138 else
2139 AssertFatalRC(rc);
2140 pgmPoolHashRemove(pPool, pPage);
2141 }
2142
2143 /*
2144 * Monitor the page at the new location and insert it into the hash.
2145 */
2146 pPage->GCPhys = GCPhysCR3;
2147 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
2148 if (rc2 != VERR_PGM_POOL_CLEARED)
2149 {
2150 AssertFatalRC(rc2);
2151 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
2152 rc = rc2;
2153 }
2154 pgmPoolHashInsert(pPool, pPage);
2155 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
2156 return rc;
2157}
2158
2159
2160/**
2161 * Removes the monitoring of a CR3 page (special).
2162 *
2163 * @returns VBox status code.
2164 * @param pPool The pool.
2165 * @param idxRoot The CR3 (root) page index.
2166 */
2167int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
2168{
2169 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
2170 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
2171 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d}\n",
2172 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
2173
2174 if (!pPage->fMonitored)
2175 return VINF_SUCCESS;
2176
2177 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
2178 int rc = pgmPoolMonitorFlush(pPool, pPage);
2179 if (rc != VERR_PGM_POOL_CLEARED)
2180 AssertFatalRC(rc);
2181 else
2182 rc = VINF_SUCCESS;
2183 pgmPoolHashRemove(pPool, pPage);
2184 Assert(!pPage->fMonitored);
2185 pPage->GCPhys = NIL_RTGCPHYS;
2186 return rc;
2187}
2188
2189# endif /* PGMPOOL_WITH_MIXED_PT_CR3 && !VBOX_WITH_PGMPOOL_PAGING_ONLY*/
2190
2191/**
2192 * Inserts the page into the list of modified pages.
2193 *
2194 * @param pPool The pool.
2195 * @param pPage The page.
2196 */
2197void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2198{
2199 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2200 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2201 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2202 && pPool->iModifiedHead != pPage->idx,
2203 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2204 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2205 pPool->iModifiedHead, pPool->cModifiedPages));
2206
2207 pPage->iModifiedNext = pPool->iModifiedHead;
2208 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2209 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2210 pPool->iModifiedHead = pPage->idx;
2211 pPool->cModifiedPages++;
2212#ifdef VBOX_WITH_STATISTICS
2213 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2214 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2215#endif
2216}
2217
2218
2219/**
2220 * Removes the page from the list of modified pages and resets the
2221 * moficiation counter.
2222 *
2223 * @param pPool The pool.
2224 * @param pPage The page which is believed to be in the list of modified pages.
2225 */
2226static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2227{
2228 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2229 if (pPool->iModifiedHead == pPage->idx)
2230 {
2231 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2232 pPool->iModifiedHead = pPage->iModifiedNext;
2233 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2234 {
2235 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2236 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2237 }
2238 pPool->cModifiedPages--;
2239 }
2240 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2241 {
2242 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2243 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2244 {
2245 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2246 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2247 }
2248 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2249 pPool->cModifiedPages--;
2250 }
2251 else
2252 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2253 pPage->cModifications = 0;
2254}
2255
2256
2257/**
2258 * Zaps the list of modified pages, resetting their modification counters in the process.
2259 *
2260 * @param pVM The VM handle.
2261 */
2262void pgmPoolMonitorModifiedClearAll(PVM pVM)
2263{
2264 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2265 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2266
2267 unsigned cPages = 0; NOREF(cPages);
2268 uint16_t idx = pPool->iModifiedHead;
2269 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2270 while (idx != NIL_PGMPOOL_IDX)
2271 {
2272 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2273 idx = pPage->iModifiedNext;
2274 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2275 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2276 pPage->cModifications = 0;
2277 Assert(++cPages);
2278 }
2279 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2280 pPool->cModifiedPages = 0;
2281}
2282
2283
2284#ifdef IN_RING3
2285/**
2286 * Clear all shadow pages and clear all modification counters.
2287 *
2288 * @param pVM The VM handle.
2289 * @remark Should only be used when monitoring is available, thus placed in
2290 * the PGMPOOL_WITH_MONITORING #ifdef.
2291 */
2292void pgmPoolClearAll(PVM pVM)
2293{
2294 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2295 STAM_PROFILE_START(&pPool->StatClearAll, c);
2296 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2297
2298 /*
2299 * Iterate all the pages until we've encountered all that in use.
2300 * This is simple but not quite optimal solution.
2301 */
2302 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2303 unsigned cLeft = pPool->cUsedPages;
2304 unsigned iPage = pPool->cCurPages;
2305 while (--iPage >= PGMPOOL_IDX_FIRST)
2306 {
2307 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2308 if (pPage->GCPhys != NIL_RTGCPHYS)
2309 {
2310 switch (pPage->enmKind)
2311 {
2312 /*
2313 * We only care about shadow page tables.
2314 */
2315 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2316 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2317 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2318 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2319 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2320 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2321 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2322 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2323 {
2324#ifdef PGMPOOL_WITH_USER_TRACKING
2325 if (pPage->cPresent)
2326#endif
2327 {
2328 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2329 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2330 ASMMemZeroPage(pvShw);
2331 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2332#ifdef PGMPOOL_WITH_USER_TRACKING
2333 pPage->cPresent = 0;
2334 pPage->iFirstPresent = ~0;
2335#endif
2336 }
2337 }
2338 /* fall thru */
2339
2340 default:
2341 Assert(!pPage->cModifications || ++cModifiedPages);
2342 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2343 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2344 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2345 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2346 pPage->cModifications = 0;
2347 break;
2348
2349 }
2350 if (!--cLeft)
2351 break;
2352 }
2353 }
2354
2355 /* swipe the special pages too. */
2356 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2357 {
2358 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2359 if (pPage->GCPhys != NIL_RTGCPHYS)
2360 {
2361 Assert(!pPage->cModifications || ++cModifiedPages);
2362 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2363 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2364 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2365 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2366 pPage->cModifications = 0;
2367 }
2368 }
2369
2370#ifndef DEBUG_michael
2371 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2372#endif
2373 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2374 pPool->cModifiedPages = 0;
2375
2376#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2377 /*
2378 * Clear all the GCPhys links and rebuild the phys ext free list.
2379 */
2380 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2381 pRam;
2382 pRam = pRam->CTX_SUFF(pNext))
2383 {
2384 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2385 while (iPage-- > 0)
2386 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
2387 }
2388
2389 pPool->iPhysExtFreeHead = 0;
2390 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2391 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2392 for (unsigned i = 0; i < cMaxPhysExts; i++)
2393 {
2394 paPhysExts[i].iNext = i + 1;
2395 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2396 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2397 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2398 }
2399 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2400#endif
2401
2402
2403 pPool->cPresent = 0;
2404 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2405}
2406#endif /* IN_RING3 */
2407
2408
2409/**
2410 * Handle SyncCR3 pool tasks
2411 *
2412 * @returns VBox status code.
2413 * @retval VINF_SUCCESS if successfully added.
2414 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2415 * @param pVM The VM handle.
2416 * @remark Should only be used when monitoring is available, thus placed in
2417 * the PGMPOOL_WITH_MONITORING #ifdef.
2418 */
2419int pgmPoolSyncCR3(PVM pVM)
2420{
2421 LogFlow(("pgmPoolSyncCR3\n"));
2422 /*
2423 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2424 * Occasionally we will have to clear all the shadow page tables because we wanted
2425 * to monitor a page which was mapped by too many shadowed page tables. This operation
2426 * sometimes refered to as a 'lightweight flush'.
2427 */
2428 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2429 pgmPoolMonitorModifiedClearAll(pVM);
2430 else
2431 {
2432# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2433 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2434 pgmPoolClearAll(pVM);
2435# else /* !IN_RING3 */
2436 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2437 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2438 return VINF_PGM_SYNC_CR3;
2439# endif /* !IN_RING3 */
2440 }
2441 return VINF_SUCCESS;
2442}
2443
2444#endif /* PGMPOOL_WITH_MONITORING */
2445#ifdef PGMPOOL_WITH_USER_TRACKING
2446
2447/**
2448 * Frees up at least one user entry.
2449 *
2450 * @returns VBox status code.
2451 * @retval VINF_SUCCESS if successfully added.
2452 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2453 * @param pPool The pool.
2454 * @param iUser The user index.
2455 */
2456static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2457{
2458 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2459#ifdef PGMPOOL_WITH_CACHE
2460 /*
2461 * Just free cached pages in a braindead fashion.
2462 */
2463 /** @todo walk the age list backwards and free the first with usage. */
2464 int rc = VINF_SUCCESS;
2465 do
2466 {
2467 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2468 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2469 rc = rc2;
2470 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2471 return rc;
2472#else
2473 /*
2474 * Lazy approach.
2475 */
2476 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2477 Assert(!CPUMIsGuestInLongMode(pVM));
2478 pgmPoolFlushAllInt(pPool);
2479 return VERR_PGM_POOL_FLUSHED;
2480#endif
2481}
2482
2483
2484/**
2485 * Inserts a page into the cache.
2486 *
2487 * This will create user node for the page, insert it into the GCPhys
2488 * hash, and insert it into the age list.
2489 *
2490 * @returns VBox status code.
2491 * @retval VINF_SUCCESS if successfully added.
2492 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2493 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2494 * @param pPool The pool.
2495 * @param pPage The cached page.
2496 * @param GCPhys The GC physical address of the page we're gonna shadow.
2497 * @param iUser The user index.
2498 * @param iUserTable The user table index.
2499 */
2500DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2501{
2502 int rc = VINF_SUCCESS;
2503 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2504
2505 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2506
2507#ifdef VBOX_STRICT
2508 /*
2509 * Check that the entry doesn't already exists.
2510 */
2511 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2512 {
2513 uint16_t i = pPage->iUserHead;
2514 do
2515 {
2516 Assert(i < pPool->cMaxUsers);
2517 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2518 i = paUsers[i].iNext;
2519 } while (i != NIL_PGMPOOL_USER_INDEX);
2520 }
2521#endif
2522
2523 /*
2524 * Find free a user node.
2525 */
2526 uint16_t i = pPool->iUserFreeHead;
2527 if (i == NIL_PGMPOOL_USER_INDEX)
2528 {
2529 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2530 if (RT_FAILURE(rc))
2531 return rc;
2532 i = pPool->iUserFreeHead;
2533 }
2534
2535 /*
2536 * Unlink the user node from the free list,
2537 * initialize and insert it into the user list.
2538 */
2539 pPool->iUserFreeHead = paUsers[i].iNext;
2540 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2541 paUsers[i].iUser = iUser;
2542 paUsers[i].iUserTable = iUserTable;
2543 pPage->iUserHead = i;
2544
2545 /*
2546 * Insert into cache and enable monitoring of the guest page if enabled.
2547 *
2548 * Until we implement caching of all levels, including the CR3 one, we'll
2549 * have to make sure we don't try monitor & cache any recursive reuse of
2550 * a monitored CR3 page. Because all windows versions are doing this we'll
2551 * have to be able to do combined access monitoring, CR3 + PT and
2552 * PD + PT (guest PAE).
2553 *
2554 * Update:
2555 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2556 */
2557#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2558# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2559 const bool fCanBeMonitored = true;
2560# else
2561 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2562 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2563 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2564# endif
2565# ifdef PGMPOOL_WITH_CACHE
2566 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2567# endif
2568 if (fCanBeMonitored)
2569 {
2570# ifdef PGMPOOL_WITH_MONITORING
2571 rc = pgmPoolMonitorInsert(pPool, pPage);
2572 if (rc == VERR_PGM_POOL_CLEARED)
2573 {
2574 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2575# ifndef PGMPOOL_WITH_CACHE
2576 pgmPoolMonitorFlush(pPool, pPage);
2577 rc = VERR_PGM_POOL_FLUSHED;
2578# endif
2579 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2580 paUsers[i].iNext = pPool->iUserFreeHead;
2581 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2582 pPool->iUserFreeHead = i;
2583 }
2584 }
2585# endif
2586#endif /* PGMPOOL_WITH_MONITORING */
2587 return rc;
2588}
2589
2590
2591# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2592/**
2593 * Adds a user reference to a page.
2594 *
2595 * This will move the page to the head of the
2596 *
2597 * @returns VBox status code.
2598 * @retval VINF_SUCCESS if successfully added.
2599 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2600 * @param pPool The pool.
2601 * @param pPage The cached page.
2602 * @param iUser The user index.
2603 * @param iUserTable The user table.
2604 */
2605static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2606{
2607 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2608
2609 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2610# ifdef VBOX_STRICT
2611 /*
2612 * Check that the entry doesn't already exists.
2613 */
2614 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2615 {
2616 uint16_t i = pPage->iUserHead;
2617 do
2618 {
2619 Assert(i < pPool->cMaxUsers);
2620 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2621 i = paUsers[i].iNext;
2622 } while (i != NIL_PGMPOOL_USER_INDEX);
2623 }
2624# endif
2625
2626 /*
2627 * Allocate a user node.
2628 */
2629 uint16_t i = pPool->iUserFreeHead;
2630 if (i == NIL_PGMPOOL_USER_INDEX)
2631 {
2632 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2633 if (RT_FAILURE(rc))
2634 return rc;
2635 i = pPool->iUserFreeHead;
2636 }
2637 pPool->iUserFreeHead = paUsers[i].iNext;
2638
2639 /*
2640 * Initialize the user node and insert it.
2641 */
2642 paUsers[i].iNext = pPage->iUserHead;
2643 paUsers[i].iUser = iUser;
2644 paUsers[i].iUserTable = iUserTable;
2645 pPage->iUserHead = i;
2646
2647# ifdef PGMPOOL_WITH_CACHE
2648 /*
2649 * Tell the cache to update its replacement stats for this page.
2650 */
2651 pgmPoolCacheUsed(pPool, pPage);
2652# endif
2653 return VINF_SUCCESS;
2654}
2655# endif /* PGMPOOL_WITH_CACHE */
2656
2657
2658/**
2659 * Frees a user record associated with a page.
2660 *
2661 * This does not clear the entry in the user table, it simply replaces the
2662 * user record to the chain of free records.
2663 *
2664 * @param pPool The pool.
2665 * @param HCPhys The HC physical address of the shadow page.
2666 * @param iUser The shadow page pool index of the user table.
2667 * @param iUserTable The index into the user table (shadowed).
2668 */
2669static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2670{
2671 /*
2672 * Unlink and free the specified user entry.
2673 */
2674 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2675
2676 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2677 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2678 uint16_t i = pPage->iUserHead;
2679 if ( i != NIL_PGMPOOL_USER_INDEX
2680 && paUsers[i].iUser == iUser
2681 && paUsers[i].iUserTable == iUserTable)
2682 {
2683 pPage->iUserHead = paUsers[i].iNext;
2684
2685 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2686 paUsers[i].iNext = pPool->iUserFreeHead;
2687 pPool->iUserFreeHead = i;
2688 return;
2689 }
2690
2691 /* General: Linear search. */
2692 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2693 while (i != NIL_PGMPOOL_USER_INDEX)
2694 {
2695 if ( paUsers[i].iUser == iUser
2696 && paUsers[i].iUserTable == iUserTable)
2697 {
2698 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2699 paUsers[iPrev].iNext = paUsers[i].iNext;
2700 else
2701 pPage->iUserHead = paUsers[i].iNext;
2702
2703 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2704 paUsers[i].iNext = pPool->iUserFreeHead;
2705 pPool->iUserFreeHead = i;
2706 return;
2707 }
2708 iPrev = i;
2709 i = paUsers[i].iNext;
2710 }
2711
2712 /* Fatal: didn't find it */
2713 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2714 iUser, iUserTable, pPage->GCPhys));
2715}
2716
2717
2718/**
2719 * Gets the entry size of a shadow table.
2720 *
2721 * @param enmKind The kind of page.
2722 *
2723 * @returns The size of the entry in bytes. That is, 4 or 8.
2724 * @returns If the kind is not for a table, an assertion is raised and 0 is
2725 * returned.
2726 */
2727DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2728{
2729 switch (enmKind)
2730 {
2731 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2732 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2733 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2734#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2735 case PGMPOOLKIND_32BIT_PD:
2736 case PGMPOOLKIND_32BIT_PD_PHYS:
2737#else
2738 case PGMPOOLKIND_ROOT_32BIT_PD:
2739#endif
2740 return 4;
2741
2742 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2743 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2744 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2745 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2746 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2747 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2748 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2749 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2750 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2751 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2752 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2753 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2754 case PGMPOOLKIND_64BIT_PML4:
2755#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2756 case PGMPOOLKIND_ROOT_PAE_PD:
2757 case PGMPOOLKIND_ROOT_PDPT:
2758#endif
2759 case PGMPOOLKIND_PAE_PDPT:
2760 case PGMPOOLKIND_ROOT_NESTED:
2761 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2762 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2763 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2764 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2765 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2766 case PGMPOOLKIND_PAE_PD_PHYS:
2767 case PGMPOOLKIND_PAE_PDPT_PHYS:
2768 return 8;
2769
2770 default:
2771 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2772 }
2773}
2774
2775
2776/**
2777 * Gets the entry size of a guest table.
2778 *
2779 * @param enmKind The kind of page.
2780 *
2781 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2782 * @returns If the kind is not for a table, an assertion is raised and 0 is
2783 * returned.
2784 */
2785DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2786{
2787 switch (enmKind)
2788 {
2789 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2790 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2791#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2792 case PGMPOOLKIND_32BIT_PD:
2793#else
2794 case PGMPOOLKIND_ROOT_32BIT_PD:
2795#endif
2796 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2797 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2798 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2799 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2800 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2801 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2802 return 4;
2803
2804 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2805 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2806 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2807 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2808 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2809 case PGMPOOLKIND_64BIT_PML4:
2810#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2811 case PGMPOOLKIND_PAE_PDPT:
2812#else
2813 case PGMPOOLKIND_ROOT_PAE_PD:
2814 case PGMPOOLKIND_ROOT_PDPT:
2815#endif
2816 return 8;
2817
2818 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2819 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2820 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2821 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2822 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2823 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2824 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2825 case PGMPOOLKIND_ROOT_NESTED:
2826 case PGMPOOLKIND_PAE_PD_PHYS:
2827 case PGMPOOLKIND_PAE_PDPT_PHYS:
2828 case PGMPOOLKIND_32BIT_PD_PHYS:
2829 /** @todo can we return 0? (nobody is calling this...) */
2830 AssertFailed();
2831 return 0;
2832
2833 default:
2834 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2835 }
2836}
2837
2838#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2839
2840/**
2841 * Scans one shadow page table for mappings of a physical page.
2842 *
2843 * @param pVM The VM handle.
2844 * @param pPhysPage The guest page in question.
2845 * @param iShw The shadow page table.
2846 * @param cRefs The number of references made in that PT.
2847 */
2848static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2849{
2850 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
2851 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2852
2853 /*
2854 * Assert sanity.
2855 */
2856 Assert(cRefs == 1);
2857 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2858 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2859
2860 /*
2861 * Then, clear the actual mappings to the page in the shadow PT.
2862 */
2863 switch (pPage->enmKind)
2864 {
2865 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2866 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2867 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2868 {
2869 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2870 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2871 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2872 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2873 {
2874 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2875 pPT->a[i].u = 0;
2876 cRefs--;
2877 if (!cRefs)
2878 return;
2879 }
2880#ifdef LOG_ENABLED
2881 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2882 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2883 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2884 {
2885 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2886 pPT->a[i].u = 0;
2887 }
2888#endif
2889 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2890 break;
2891 }
2892
2893 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2894 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2895 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2896 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2897 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2898 {
2899 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2900 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2901 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2902 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2903 {
2904 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2905 pPT->a[i].u = 0;
2906 cRefs--;
2907 if (!cRefs)
2908 return;
2909 }
2910#ifdef LOG_ENABLED
2911 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2912 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2913 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2914 {
2915 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2916 pPT->a[i].u = 0;
2917 }
2918#endif
2919 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
2920 break;
2921 }
2922
2923 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2924 {
2925 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2926 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2927 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2928 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2929 {
2930 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2931 pPT->a[i].u = 0;
2932 cRefs--;
2933 if (!cRefs)
2934 return;
2935 }
2936#ifdef LOG_ENABLED
2937 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2938 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2939 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2940 {
2941 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2942 pPT->a[i].u = 0;
2943 }
2944#endif
2945 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2946 break;
2947 }
2948
2949 default:
2950 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2951 }
2952}
2953
2954
2955/**
2956 * Scans one shadow page table for mappings of a physical page.
2957 *
2958 * @param pVM The VM handle.
2959 * @param pPhysPage The guest page in question.
2960 * @param iShw The shadow page table.
2961 * @param cRefs The number of references made in that PT.
2962 */
2963void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2964{
2965 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2966 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
2967 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2968 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2969 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
2970 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2971}
2972
2973
2974/**
2975 * Flushes a list of shadow page tables mapping the same physical page.
2976 *
2977 * @param pVM The VM handle.
2978 * @param pPhysPage The guest page in question.
2979 * @param iPhysExt The physical cross reference extent list to flush.
2980 */
2981void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2982{
2983 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2984 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2985 LogFlow(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%R[pgmpage] iPhysExt\n", pPhysPage, iPhysExt));
2986
2987 const uint16_t iPhysExtStart = iPhysExt;
2988 PPGMPOOLPHYSEXT pPhysExt;
2989 do
2990 {
2991 Assert(iPhysExt < pPool->cMaxPhysExts);
2992 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2993 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2994 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2995 {
2996 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2997 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2998 }
2999
3000 /* next */
3001 iPhysExt = pPhysExt->iNext;
3002 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3003
3004 /* insert the list into the free list and clear the ram range entry. */
3005 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3006 pPool->iPhysExtFreeHead = iPhysExtStart;
3007 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3008
3009 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3010}
3011
3012#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3013
3014/**
3015 * Scans all shadow page tables for mappings of a physical page.
3016 *
3017 * This may be slow, but it's most likely more efficient than cleaning
3018 * out the entire page pool / cache.
3019 *
3020 * @returns VBox status code.
3021 * @retval VINF_SUCCESS if all references has been successfully cleared.
3022 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3023 * a page pool cleaning.
3024 *
3025 * @param pVM The VM handle.
3026 * @param pPhysPage The guest page in question.
3027 */
3028int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3029{
3030 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3031 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3032 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3033 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3034
3035#if 1
3036 /*
3037 * There is a limit to what makes sense.
3038 */
3039 if (pPool->cPresent > 1024)
3040 {
3041 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3042 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3043 return VINF_PGM_GCPHYS_ALIASED;
3044 }
3045#endif
3046
3047 /*
3048 * Iterate all the pages until we've encountered all that in use.
3049 * This is simple but not quite optimal solution.
3050 */
3051 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3052 const uint32_t u32 = u64;
3053 unsigned cLeft = pPool->cUsedPages;
3054 unsigned iPage = pPool->cCurPages;
3055 while (--iPage >= PGMPOOL_IDX_FIRST)
3056 {
3057 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3058 if (pPage->GCPhys != NIL_RTGCPHYS)
3059 {
3060 switch (pPage->enmKind)
3061 {
3062 /*
3063 * We only care about shadow page tables.
3064 */
3065 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3066 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3067 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3068 {
3069 unsigned cPresent = pPage->cPresent;
3070 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3071 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3072 if (pPT->a[i].n.u1Present)
3073 {
3074 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3075 {
3076 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3077 pPT->a[i].u = 0;
3078 }
3079 if (!--cPresent)
3080 break;
3081 }
3082 break;
3083 }
3084
3085 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3086 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3087 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3088 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3089 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3090 {
3091 unsigned cPresent = pPage->cPresent;
3092 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3093 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3094 if (pPT->a[i].n.u1Present)
3095 {
3096 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3097 {
3098 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3099 pPT->a[i].u = 0;
3100 }
3101 if (!--cPresent)
3102 break;
3103 }
3104 break;
3105 }
3106 }
3107 if (!--cLeft)
3108 break;
3109 }
3110 }
3111
3112 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3113 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3114 return VINF_SUCCESS;
3115}
3116
3117
3118/**
3119 * Clears the user entry in a user table.
3120 *
3121 * This is used to remove all references to a page when flushing it.
3122 */
3123static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3124{
3125 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3126 Assert(pUser->iUser < pPool->cCurPages);
3127 uint32_t iUserTable = pUser->iUserTable;
3128
3129 /*
3130 * Map the user page.
3131 */
3132 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3133#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3134 if (pUserPage->enmKind == PGMPOOLKIND_ROOT_PAE_PD)
3135 {
3136 /* Must translate the fake 2048 entry PD to a 512 PD one since the R0 mapping is not linear. */
3137 Assert(pUser->iUser == PGMPOOL_IDX_PAE_PD);
3138 uint32_t iPdpt = iUserTable / X86_PG_PAE_ENTRIES;
3139 iUserTable %= X86_PG_PAE_ENTRIES;
3140 pUserPage = &pPool->aPages[PGMPOOL_IDX_PAE_PD_0 + iPdpt];
3141 Assert(pUserPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD);
3142 }
3143#endif
3144 union
3145 {
3146 uint64_t *pau64;
3147 uint32_t *pau32;
3148 } u;
3149 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3150
3151 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3152
3153 /* Safety precaution in case we change the paging for other modes too in the future. */
3154#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3155 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3156#else
3157 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
3158#endif
3159
3160#ifdef VBOX_STRICT
3161 /*
3162 * Some sanity checks.
3163 */
3164 switch (pUserPage->enmKind)
3165 {
3166# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3167 case PGMPOOLKIND_32BIT_PD:
3168 case PGMPOOLKIND_32BIT_PD_PHYS:
3169 Assert(iUserTable < X86_PG_ENTRIES);
3170 break;
3171# else
3172 case PGMPOOLKIND_ROOT_32BIT_PD:
3173 Assert(iUserTable < X86_PG_ENTRIES);
3174 Assert(!(u.pau32[iUserTable] & PGM_PDFLAGS_MAPPING));
3175 break;
3176# endif
3177# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3178 case PGMPOOLKIND_ROOT_PAE_PD:
3179 Assert(iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
3180 AssertMsg(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING), ("%llx %d\n", u.pau64[iUserTable], iUserTable));
3181 break;
3182# endif
3183# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3184 case PGMPOOLKIND_PAE_PDPT:
3185 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3186 case PGMPOOLKIND_PAE_PDPT_PHYS:
3187# else
3188 case PGMPOOLKIND_ROOT_PDPT:
3189# endif
3190 Assert(iUserTable < 4);
3191 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3192 break;
3193 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3194 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3195 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3196 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3197 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3198 case PGMPOOLKIND_PAE_PD_PHYS:
3199 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3200 break;
3201 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3202 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3203 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3204 break;
3205 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3206 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3207 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3208 break;
3209 case PGMPOOLKIND_64BIT_PML4:
3210 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3211 /* GCPhys >> PAGE_SHIFT is the index here */
3212 break;
3213 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3214 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3215 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3216 break;
3217
3218 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3219 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3220 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3221 break;
3222
3223 case PGMPOOLKIND_ROOT_NESTED:
3224 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3225 break;
3226
3227 default:
3228 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3229 break;
3230 }
3231#endif /* VBOX_STRICT */
3232
3233 /*
3234 * Clear the entry in the user page.
3235 */
3236 switch (pUserPage->enmKind)
3237 {
3238 /* 32-bit entries */
3239#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3240 case PGMPOOLKIND_32BIT_PD:
3241 case PGMPOOLKIND_32BIT_PD_PHYS:
3242#else
3243 case PGMPOOLKIND_ROOT_32BIT_PD:
3244#endif
3245 u.pau32[iUserTable] = 0;
3246 break;
3247
3248 /* 64-bit entries */
3249 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3250 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3251 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3252 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3253 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3254#if defined(IN_RC) && defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3255 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3256 * non-present PDPT will continue to cause page faults.
3257 */
3258 ASMReloadCR3();
3259#endif
3260 /* no break */
3261 case PGMPOOLKIND_PAE_PD_PHYS:
3262 case PGMPOOLKIND_PAE_PDPT_PHYS:
3263 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3264 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3265 case PGMPOOLKIND_64BIT_PML4:
3266 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3267 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3268#if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3269 case PGMPOOLKIND_ROOT_PAE_PD:
3270#endif
3271#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3272 case PGMPOOLKIND_PAE_PDPT:
3273 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3274#else
3275 case PGMPOOLKIND_ROOT_PDPT:
3276#endif
3277 case PGMPOOLKIND_ROOT_NESTED:
3278 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3279 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3280 u.pau64[iUserTable] = 0;
3281 break;
3282
3283 default:
3284 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3285 }
3286}
3287
3288
3289/**
3290 * Clears all users of a page.
3291 */
3292static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3293{
3294 /*
3295 * Free all the user records.
3296 */
3297 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3298
3299 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3300 uint16_t i = pPage->iUserHead;
3301 while (i != NIL_PGMPOOL_USER_INDEX)
3302 {
3303 /* Clear enter in user table. */
3304 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3305
3306 /* Free it. */
3307 const uint16_t iNext = paUsers[i].iNext;
3308 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3309 paUsers[i].iNext = pPool->iUserFreeHead;
3310 pPool->iUserFreeHead = i;
3311
3312 /* Next. */
3313 i = iNext;
3314 }
3315 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3316}
3317
3318#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3319
3320/**
3321 * Allocates a new physical cross reference extent.
3322 *
3323 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3324 * @param pVM The VM handle.
3325 * @param piPhysExt Where to store the phys ext index.
3326 */
3327PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3328{
3329 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3330 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3331 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3332 {
3333 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3334 return NULL;
3335 }
3336 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3337 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3338 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3339 *piPhysExt = iPhysExt;
3340 return pPhysExt;
3341}
3342
3343
3344/**
3345 * Frees a physical cross reference extent.
3346 *
3347 * @param pVM The VM handle.
3348 * @param iPhysExt The extent to free.
3349 */
3350void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3351{
3352 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3353 Assert(iPhysExt < pPool->cMaxPhysExts);
3354 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3355 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3356 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3357 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3358 pPool->iPhysExtFreeHead = iPhysExt;
3359}
3360
3361
3362/**
3363 * Frees a physical cross reference extent.
3364 *
3365 * @param pVM The VM handle.
3366 * @param iPhysExt The extent to free.
3367 */
3368void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3369{
3370 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3371
3372 const uint16_t iPhysExtStart = iPhysExt;
3373 PPGMPOOLPHYSEXT pPhysExt;
3374 do
3375 {
3376 Assert(iPhysExt < pPool->cMaxPhysExts);
3377 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3378 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3379 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3380
3381 /* next */
3382 iPhysExt = pPhysExt->iNext;
3383 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3384
3385 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3386 pPool->iPhysExtFreeHead = iPhysExtStart;
3387}
3388
3389
3390/**
3391 * Insert a reference into a list of physical cross reference extents.
3392 *
3393 * @returns The new tracking data for PGMPAGE.
3394 *
3395 * @param pVM The VM handle.
3396 * @param iPhysExt The physical extent index of the list head.
3397 * @param iShwPT The shadow page table index.
3398 *
3399 */
3400static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3401{
3402 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3403 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3404
3405 /* special common case. */
3406 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3407 {
3408 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3409 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3410 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
3411 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3412 }
3413
3414 /* general treatment. */
3415 const uint16_t iPhysExtStart = iPhysExt;
3416 unsigned cMax = 15;
3417 for (;;)
3418 {
3419 Assert(iPhysExt < pPool->cMaxPhysExts);
3420 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3421 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3422 {
3423 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3424 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3425 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3426 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3427 }
3428 if (!--cMax)
3429 {
3430 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3431 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3432 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
3433 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3434 }
3435 }
3436
3437 /* add another extent to the list. */
3438 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3439 if (!pNew)
3440 {
3441 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3442 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3443 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3444 }
3445 pNew->iNext = iPhysExtStart;
3446 pNew->aidx[0] = iShwPT;
3447 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3448 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3449}
3450
3451
3452/**
3453 * Add a reference to guest physical page where extents are in use.
3454 *
3455 * @returns The new tracking data for PGMPAGE.
3456 *
3457 * @param pVM The VM handle.
3458 * @param u16 The ram range flags (top 16-bits).
3459 * @param iShwPT The shadow page table index.
3460 */
3461uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3462{
3463 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3464 {
3465 /*
3466 * Convert to extent list.
3467 */
3468 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3469 uint16_t iPhysExt;
3470 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3471 if (pPhysExt)
3472 {
3473 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3474 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3475 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3476 pPhysExt->aidx[1] = iShwPT;
3477 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3478 }
3479 else
3480 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3481 }
3482 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3483 {
3484 /*
3485 * Insert into the extent list.
3486 */
3487 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3488 }
3489 else
3490 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3491 return u16;
3492}
3493
3494
3495/**
3496 * Clear references to guest physical memory.
3497 *
3498 * @param pPool The pool.
3499 * @param pPage The page.
3500 * @param pPhysPage Pointer to the aPages entry in the ram range.
3501 */
3502void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3503{
3504 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3505 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3506
3507 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3508 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3509 {
3510 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3511 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3512 do
3513 {
3514 Assert(iPhysExt < pPool->cMaxPhysExts);
3515
3516 /*
3517 * Look for the shadow page and check if it's all freed.
3518 */
3519 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3520 {
3521 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3522 {
3523 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3524
3525 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3526 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3527 {
3528 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3529 return;
3530 }
3531
3532 /* we can free the node. */
3533 PVM pVM = pPool->CTX_SUFF(pVM);
3534 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3535 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3536 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3537 {
3538 /* lonely node */
3539 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3540 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3541 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3542 }
3543 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3544 {
3545 /* head */
3546 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3547 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3548 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3549 }
3550 else
3551 {
3552 /* in list */
3553 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3554 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3555 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3556 }
3557 iPhysExt = iPhysExtNext;
3558 return;
3559 }
3560 }
3561
3562 /* next */
3563 iPhysExtPrev = iPhysExt;
3564 iPhysExt = paPhysExts[iPhysExt].iNext;
3565 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3566
3567 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3568 }
3569 else /* nothing to do */
3570 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3571}
3572
3573
3574/**
3575 * Clear references to guest physical memory.
3576 *
3577 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3578 * is assumed to be correct, so the linear search can be skipped and we can assert
3579 * at an earlier point.
3580 *
3581 * @param pPool The pool.
3582 * @param pPage The page.
3583 * @param HCPhys The host physical address corresponding to the guest page.
3584 * @param GCPhys The guest physical address corresponding to HCPhys.
3585 */
3586static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3587{
3588 /*
3589 * Walk range list.
3590 */
3591 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3592 while (pRam)
3593 {
3594 RTGCPHYS off = GCPhys - pRam->GCPhys;
3595 if (off < pRam->cb)
3596 {
3597 /* does it match? */
3598 const unsigned iPage = off >> PAGE_SHIFT;
3599 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3600#ifdef LOG_ENABLED
3601RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3602Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3603#endif
3604 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3605 {
3606 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3607 return;
3608 }
3609 break;
3610 }
3611 pRam = pRam->CTX_SUFF(pNext);
3612 }
3613 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3614}
3615
3616
3617/**
3618 * Clear references to guest physical memory.
3619 *
3620 * @param pPool The pool.
3621 * @param pPage The page.
3622 * @param HCPhys The host physical address corresponding to the guest page.
3623 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3624 */
3625static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3626{
3627 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
3628
3629 /*
3630 * Walk range list.
3631 */
3632 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3633 while (pRam)
3634 {
3635 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3636 if (off < pRam->cb)
3637 {
3638 /* does it match? */
3639 const unsigned iPage = off >> PAGE_SHIFT;
3640 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3641 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3642 {
3643 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3644 return;
3645 }
3646 break;
3647 }
3648 pRam = pRam->CTX_SUFF(pNext);
3649 }
3650
3651 /*
3652 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3653 */
3654 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3655 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3656 while (pRam)
3657 {
3658 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3659 while (iPage-- > 0)
3660 {
3661 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3662 {
3663 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3664 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3665 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3666 return;
3667 }
3668 }
3669 pRam = pRam->CTX_SUFF(pNext);
3670 }
3671
3672 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3673}
3674
3675
3676/**
3677 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3678 *
3679 * @param pPool The pool.
3680 * @param pPage The page.
3681 * @param pShwPT The shadow page table (mapping of the page).
3682 * @param pGstPT The guest page table.
3683 */
3684DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3685{
3686 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3687 if (pShwPT->a[i].n.u1Present)
3688 {
3689 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3690 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3691 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3692 if (!--pPage->cPresent)
3693 break;
3694 }
3695}
3696
3697
3698/**
3699 * Clear references to guest physical memory in a PAE / 32-bit page table.
3700 *
3701 * @param pPool The pool.
3702 * @param pPage The page.
3703 * @param pShwPT The shadow page table (mapping of the page).
3704 * @param pGstPT The guest page table (just a half one).
3705 */
3706DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3707{
3708 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3709 if (pShwPT->a[i].n.u1Present)
3710 {
3711 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
3712 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3713 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3714 }
3715}
3716
3717
3718/**
3719 * Clear references to guest physical memory in a PAE / PAE page table.
3720 *
3721 * @param pPool The pool.
3722 * @param pPage The page.
3723 * @param pShwPT The shadow page table (mapping of the page).
3724 * @param pGstPT The guest page table.
3725 */
3726DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3727{
3728 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3729 if (pShwPT->a[i].n.u1Present)
3730 {
3731 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3732 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3733 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3734 }
3735}
3736
3737
3738/**
3739 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3740 *
3741 * @param pPool The pool.
3742 * @param pPage The page.
3743 * @param pShwPT The shadow page table (mapping of the page).
3744 */
3745DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3746{
3747 RTGCPHYS GCPhys = pPage->GCPhys;
3748 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3749 if (pShwPT->a[i].n.u1Present)
3750 {
3751 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3752 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3753 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3754 }
3755}
3756
3757
3758/**
3759 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3760 *
3761 * @param pPool The pool.
3762 * @param pPage The page.
3763 * @param pShwPT The shadow page table (mapping of the page).
3764 */
3765DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3766{
3767 RTGCPHYS GCPhys = pPage->GCPhys;
3768 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3769 if (pShwPT->a[i].n.u1Present)
3770 {
3771 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3772 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3773 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3774 }
3775}
3776
3777#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3778
3779
3780#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3781/**
3782 * Clear references to shadowed pages in a 32 bits page directory.
3783 *
3784 * @param pPool The pool.
3785 * @param pPage The page.
3786 * @param pShwPD The shadow page directory (mapping of the page).
3787 */
3788DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
3789{
3790 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3791 {
3792 if ( pShwPD->a[i].n.u1Present
3793 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3794 )
3795 {
3796 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
3797 if (pSubPage)
3798 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3799 else
3800 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
3801 }
3802 }
3803}
3804#endif
3805
3806/**
3807 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3808 *
3809 * @param pPool The pool.
3810 * @param pPage The page.
3811 * @param pShwPD The shadow page directory (mapping of the page).
3812 */
3813DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3814{
3815 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3816 {
3817 if ( pShwPD->a[i].n.u1Present
3818#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3819 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3820#endif
3821 )
3822 {
3823 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3824 if (pSubPage)
3825 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3826 else
3827 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3828 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3829 }
3830 }
3831}
3832
3833
3834/**
3835 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3836 *
3837 * @param pPool The pool.
3838 * @param pPage The page.
3839 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3840 */
3841DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3842{
3843 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3844 {
3845 if ( pShwPDPT->a[i].n.u1Present
3846#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3847 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
3848#endif
3849 )
3850 {
3851 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3852 if (pSubPage)
3853 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3854 else
3855 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3856 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3857 }
3858 }
3859}
3860
3861
3862/**
3863 * Clear references to shadowed pages in a 64-bit level 4 page table.
3864 *
3865 * @param pPool The pool.
3866 * @param pPage The page.
3867 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3868 */
3869DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3870{
3871 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3872 {
3873 if (pShwPML4->a[i].n.u1Present)
3874 {
3875 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3876 if (pSubPage)
3877 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3878 else
3879 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3880 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3881 }
3882 }
3883}
3884
3885
3886/**
3887 * Clear references to shadowed pages in an EPT page table.
3888 *
3889 * @param pPool The pool.
3890 * @param pPage The page.
3891 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3892 */
3893DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3894{
3895 RTGCPHYS GCPhys = pPage->GCPhys;
3896 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3897 if (pShwPT->a[i].n.u1Present)
3898 {
3899 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3900 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3901 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3902 }
3903}
3904
3905
3906/**
3907 * Clear references to shadowed pages in an EPT page directory.
3908 *
3909 * @param pPool The pool.
3910 * @param pPage The page.
3911 * @param pShwPD The shadow page directory (mapping of the page).
3912 */
3913DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3914{
3915 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3916 {
3917 if (pShwPD->a[i].n.u1Present)
3918 {
3919 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3920 if (pSubPage)
3921 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3922 else
3923 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3924 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3925 }
3926 }
3927}
3928
3929
3930/**
3931 * Clear references to shadowed pages in an EPT page directory pointer table.
3932 *
3933 * @param pPool The pool.
3934 * @param pPage The page.
3935 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3936 */
3937DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3938{
3939 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3940 {
3941 if (pShwPDPT->a[i].n.u1Present)
3942 {
3943 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3944 if (pSubPage)
3945 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3946 else
3947 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3948 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3949 }
3950 }
3951}
3952
3953
3954/**
3955 * Clears all references made by this page.
3956 *
3957 * This includes other shadow pages and GC physical addresses.
3958 *
3959 * @param pPool The pool.
3960 * @param pPage The page.
3961 */
3962static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3963{
3964 /*
3965 * Map the shadow page and take action according to the page kind.
3966 */
3967 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
3968 switch (pPage->enmKind)
3969 {
3970#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3971 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3972 {
3973 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3974 void *pvGst;
3975 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3976 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3977 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3978 break;
3979 }
3980
3981 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3982 {
3983 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3984 void *pvGst;
3985 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3986 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3987 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3988 break;
3989 }
3990
3991 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3992 {
3993 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3994 void *pvGst;
3995 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3996 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3997 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3998 break;
3999 }
4000
4001 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4002 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4003 {
4004 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4005 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4006 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4007 break;
4008 }
4009
4010 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4011 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4012 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4013 {
4014 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4015 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4016 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4017 break;
4018 }
4019
4020#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4021 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4022 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4023 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4024 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4025 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4026 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4027 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4028 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4029 break;
4030#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4031
4032 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4033 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4034 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4035 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4036 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4037 case PGMPOOLKIND_PAE_PD_PHYS:
4038 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4039 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4040 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4041 break;
4042
4043#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4044 case PGMPOOLKIND_32BIT_PD_PHYS:
4045 case PGMPOOLKIND_32BIT_PD:
4046 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4047 break;
4048
4049 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4050 case PGMPOOLKIND_PAE_PDPT:
4051 case PGMPOOLKIND_PAE_PDPT_PHYS:
4052#endif
4053 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4054 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4055 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4056 break;
4057
4058 case PGMPOOLKIND_64BIT_PML4:
4059 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4060 break;
4061
4062 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4063 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4064 break;
4065
4066 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4067 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4068 break;
4069
4070 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4071 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4072 break;
4073
4074 default:
4075 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4076 }
4077
4078 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4079 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4080 ASMMemZeroPage(pvShw);
4081 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4082 pPage->fZeroed = true;
4083 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4084}
4085
4086#endif /* PGMPOOL_WITH_USER_TRACKING */
4087
4088/**
4089 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
4090 *
4091 * @param pPool The pool.
4092 */
4093static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
4094{
4095#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4096 /* Start a subset so we won't run out of mapping space. */
4097 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4098 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4099#endif
4100
4101 /*
4102 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
4103 */
4104 Assert(NIL_PGMPOOL_IDX == 0);
4105 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
4106 {
4107 /*
4108 * Get the page address.
4109 */
4110 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4111 union
4112 {
4113 uint64_t *pau64;
4114 uint32_t *pau32;
4115 } u;
4116
4117 /*
4118 * Mark stuff not present.
4119 */
4120 switch (pPage->enmKind)
4121 {
4122#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4123 case PGMPOOLKIND_ROOT_32BIT_PD:
4124 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4125 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
4126 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
4127 u.pau32[iPage] = 0;
4128 break;
4129
4130 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4131 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4132 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
4133 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
4134 u.pau64[iPage] = 0;
4135 break;
4136
4137 case PGMPOOLKIND_ROOT_PDPT:
4138 /* Not root of shadowed pages currently, ignore it. */
4139 break;
4140#endif
4141
4142 case PGMPOOLKIND_ROOT_NESTED:
4143 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4144 ASMMemZero32(u.pau64, PAGE_SIZE);
4145 break;
4146 }
4147 }
4148
4149 /*
4150 * Paranoia (to be removed), flag a global CR3 sync.
4151 */
4152 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
4153
4154#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4155 /* Pop the subset. */
4156 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4157#endif
4158}
4159
4160
4161/**
4162 * Flushes the entire cache.
4163 *
4164 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4165 * and execute this CR3 flush.
4166 *
4167 * @param pPool The pool.
4168 */
4169static void pgmPoolFlushAllInt(PPGMPOOL pPool)
4170{
4171 PVM pVM = pPool->CTX_SUFF(pVM);
4172
4173 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
4174 LogFlow(("pgmPoolFlushAllInt:\n"));
4175
4176 /*
4177 * If there are no pages in the pool, there is nothing to do.
4178 */
4179 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4180 {
4181 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4182 return;
4183 }
4184
4185#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4186 /* Unmap the old CR3 value before flushing everything. */
4187 int rc = PGM_BTH_PFN(UnmapCR3, pVM)(pVM);
4188 AssertRC(rc);
4189#endif
4190
4191 /*
4192 * Nuke the free list and reinsert all pages into it.
4193 */
4194 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4195 {
4196 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4197
4198#ifdef IN_RING3
4199 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
4200#endif
4201#ifdef PGMPOOL_WITH_MONITORING
4202 if (pPage->fMonitored)
4203 pgmPoolMonitorFlush(pPool, pPage);
4204 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4205 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4206 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4207 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4208 pPage->cModifications = 0;
4209#endif
4210 pPage->GCPhys = NIL_RTGCPHYS;
4211 pPage->enmKind = PGMPOOLKIND_FREE;
4212 Assert(pPage->idx == i);
4213 pPage->iNext = i + 1;
4214 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4215 pPage->fSeenNonGlobal = false;
4216 pPage->fMonitored= false;
4217 pPage->fCached = false;
4218 pPage->fReusedFlushPending = false;
4219#ifdef PGMPOOL_WITH_USER_TRACKING
4220 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4221#else
4222 pPage->fCR3Mix = false;
4223#endif
4224#ifdef PGMPOOL_WITH_CACHE
4225 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4226 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4227#endif
4228 }
4229 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4230 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4231 pPool->cUsedPages = 0;
4232
4233#ifdef PGMPOOL_WITH_USER_TRACKING
4234 /*
4235 * Zap and reinitialize the user records.
4236 */
4237 pPool->cPresent = 0;
4238 pPool->iUserFreeHead = 0;
4239 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4240 const unsigned cMaxUsers = pPool->cMaxUsers;
4241 for (unsigned i = 0; i < cMaxUsers; i++)
4242 {
4243 paUsers[i].iNext = i + 1;
4244 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4245 paUsers[i].iUserTable = 0xfffffffe;
4246 }
4247 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4248#endif
4249
4250#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4251 /*
4252 * Clear all the GCPhys links and rebuild the phys ext free list.
4253 */
4254 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
4255 pRam;
4256 pRam = pRam->CTX_SUFF(pNext))
4257 {
4258 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4259 while (iPage-- > 0)
4260 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
4261 }
4262
4263 pPool->iPhysExtFreeHead = 0;
4264 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4265 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4266 for (unsigned i = 0; i < cMaxPhysExts; i++)
4267 {
4268 paPhysExts[i].iNext = i + 1;
4269 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4270 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4271 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4272 }
4273 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4274#endif
4275
4276#ifdef PGMPOOL_WITH_MONITORING
4277 /*
4278 * Just zap the modified list.
4279 */
4280 pPool->cModifiedPages = 0;
4281 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4282#endif
4283
4284#ifdef PGMPOOL_WITH_CACHE
4285 /*
4286 * Clear the GCPhys hash and the age list.
4287 */
4288 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4289 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4290 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4291 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4292#endif
4293
4294 /*
4295 * Flush all the special root pages.
4296 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4297 */
4298 pgmPoolFlushAllSpecialRoots(pPool);
4299 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4300 {
4301 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4302 pPage->iNext = NIL_PGMPOOL_IDX;
4303#ifdef PGMPOOL_WITH_MONITORING
4304 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4305 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4306 pPage->cModifications = 0;
4307 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4308 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4309 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4310 if (pPage->fMonitored)
4311 {
4312 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4313 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4314 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4315 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4316 pPool->pszAccessHandler);
4317 AssertFatalRCSuccess(rc);
4318# ifdef PGMPOOL_WITH_CACHE
4319 pgmPoolHashInsert(pPool, pPage);
4320# endif
4321 }
4322#endif
4323#ifdef PGMPOOL_WITH_USER_TRACKING
4324 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4325#endif
4326#ifdef PGMPOOL_WITH_CACHE
4327 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4328 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4329#endif
4330 }
4331
4332 /*
4333 * Finally, assert the FF.
4334 */
4335 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
4336
4337 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4338}
4339
4340
4341/**
4342 * Flushes a pool page.
4343 *
4344 * This moves the page to the free list after removing all user references to it.
4345 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
4346 *
4347 * @returns VBox status code.
4348 * @retval VINF_SUCCESS on success.
4349 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
4350 * @param pPool The pool.
4351 * @param HCPhys The HC physical address of the shadow page.
4352 */
4353int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4354{
4355 int rc = VINF_SUCCESS;
4356 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4357 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4358 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4359
4360 /*
4361 * Quietly reject any attempts at flushing any of the special root pages.
4362 */
4363 if (pPage->idx < PGMPOOL_IDX_FIRST)
4364 {
4365 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4366 return VINF_SUCCESS;
4367 }
4368
4369 /*
4370 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4371 */
4372#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4373 if (pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
4374 {
4375 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4376 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4377 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4378 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4379 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4380 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4381 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4382 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4383 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4384 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4385#else
4386 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
4387 {
4388 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4,
4389 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4390#endif
4391 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4392 return VINF_SUCCESS;
4393 }
4394
4395#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4396 /* Start a subset so we won't run out of mapping space. */
4397 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4398 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4399#endif
4400
4401 /*
4402 * Mark the page as being in need of a ASMMemZeroPage().
4403 */
4404 pPage->fZeroed = false;
4405
4406#ifdef PGMPOOL_WITH_USER_TRACKING
4407 /*
4408 * Clear the page.
4409 */
4410 pgmPoolTrackClearPageUsers(pPool, pPage);
4411 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4412 pgmPoolTrackDeref(pPool, pPage);
4413 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4414#endif
4415
4416#ifdef PGMPOOL_WITH_CACHE
4417 /*
4418 * Flush it from the cache.
4419 */
4420 pgmPoolCacheFlushPage(pPool, pPage);
4421#endif /* PGMPOOL_WITH_CACHE */
4422
4423#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4424 /* Heavy stuff done. */
4425 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4426#endif
4427
4428#ifdef PGMPOOL_WITH_MONITORING
4429 /*
4430 * Deregistering the monitoring.
4431 */
4432 if (pPage->fMonitored)
4433 rc = pgmPoolMonitorFlush(pPool, pPage);
4434#endif
4435
4436 /*
4437 * Free the page.
4438 */
4439 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4440 pPage->iNext = pPool->iFreeHead;
4441 pPool->iFreeHead = pPage->idx;
4442 pPage->enmKind = PGMPOOLKIND_FREE;
4443 pPage->GCPhys = NIL_RTGCPHYS;
4444 pPage->fReusedFlushPending = false;
4445
4446 pPool->cUsedPages--;
4447 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4448 return rc;
4449}
4450
4451
4452/**
4453 * Frees a usage of a pool page.
4454 *
4455 * The caller is responsible to updating the user table so that it no longer
4456 * references the shadow page.
4457 *
4458 * @param pPool The pool.
4459 * @param HCPhys The HC physical address of the shadow page.
4460 * @param iUser The shadow page pool index of the user table.
4461 * @param iUserTable The index into the user table (shadowed).
4462 */
4463void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4464{
4465 STAM_PROFILE_START(&pPool->StatFree, a);
4466 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4467 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4468 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4469#ifdef PGMPOOL_WITH_USER_TRACKING
4470 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4471#endif
4472#ifdef PGMPOOL_WITH_CACHE
4473 if (!pPage->fCached)
4474#endif
4475 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
4476 STAM_PROFILE_STOP(&pPool->StatFree, a);
4477}
4478
4479
4480/**
4481 * Makes one or more free page free.
4482 *
4483 * @returns VBox status code.
4484 * @retval VINF_SUCCESS on success.
4485 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4486 *
4487 * @param pPool The pool.
4488 * @param enmKind Page table kind
4489 * @param iUser The user of the page.
4490 */
4491static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4492{
4493 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4494
4495 /*
4496 * If the pool isn't full grown yet, expand it.
4497 */
4498 if ( pPool->cCurPages < pPool->cMaxPages
4499#if defined(VBOX_WITH_PGMPOOL_PAGING_ONLY) && defined(IN_RC)
4500 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4501 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4502 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4503#endif
4504 )
4505 {
4506 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4507#ifdef IN_RING3
4508 int rc = PGMR3PoolGrow(pPool->pVMR3);
4509#else
4510 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
4511#endif
4512 if (RT_FAILURE(rc))
4513 return rc;
4514 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4515 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4516 return VINF_SUCCESS;
4517 }
4518
4519#ifdef PGMPOOL_WITH_CACHE
4520 /*
4521 * Free one cached page.
4522 */
4523 return pgmPoolCacheFreeOne(pPool, iUser);
4524#else
4525 /*
4526 * Flush the pool.
4527 *
4528 * If we have tracking enabled, it should be possible to come up with
4529 * a cheap replacement strategy...
4530 */
4531 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
4532 Assert(!CPUMIsGuestInLongMode(pVM));
4533 pgmPoolFlushAllInt(pPool);
4534 return VERR_PGM_POOL_FLUSHED;
4535#endif
4536}
4537
4538
4539/**
4540 * Allocates a page from the pool.
4541 *
4542 * This page may actually be a cached page and not in need of any processing
4543 * on the callers part.
4544 *
4545 * @returns VBox status code.
4546 * @retval VINF_SUCCESS if a NEW page was allocated.
4547 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4548 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4549 * @param pVM The VM handle.
4550 * @param GCPhys The GC physical address of the page we're gonna shadow.
4551 * For 4MB and 2MB PD entries, it's the first address the
4552 * shadow PT is covering.
4553 * @param enmKind The kind of mapping.
4554 * @param iUser The shadow page pool index of the user table.
4555 * @param iUserTable The index into the user table (shadowed).
4556 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4557 */
4558int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
4559{
4560 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4561 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4562 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4563 *ppPage = NULL;
4564 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4565 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4566 * Assert(!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4567
4568#ifdef PGMPOOL_WITH_CACHE
4569 if (pPool->fCacheEnabled)
4570 {
4571 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
4572 if (RT_SUCCESS(rc2))
4573 {
4574 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4575 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4576 return rc2;
4577 }
4578 }
4579#endif
4580
4581 /*
4582 * Allocate a new one.
4583 */
4584 int rc = VINF_SUCCESS;
4585 uint16_t iNew = pPool->iFreeHead;
4586 if (iNew == NIL_PGMPOOL_IDX)
4587 {
4588 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4589 if (RT_FAILURE(rc))
4590 {
4591 if (rc != VERR_PGM_POOL_CLEARED)
4592 {
4593 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4594 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4595 return rc;
4596 }
4597 Log(("pgmPoolMakeMoreFreePages failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
4598 rc = VERR_PGM_POOL_FLUSHED;
4599 }
4600 iNew = pPool->iFreeHead;
4601 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4602 }
4603
4604 /* unlink the free head */
4605 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4606 pPool->iFreeHead = pPage->iNext;
4607 pPage->iNext = NIL_PGMPOOL_IDX;
4608
4609 /*
4610 * Initialize it.
4611 */
4612 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4613 pPage->enmKind = enmKind;
4614 pPage->GCPhys = GCPhys;
4615 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4616 pPage->fMonitored = false;
4617 pPage->fCached = false;
4618 pPage->fReusedFlushPending = false;
4619#ifdef PGMPOOL_WITH_MONITORING
4620 pPage->cModifications = 0;
4621 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4622 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4623#else
4624 pPage->fCR3Mix = false;
4625#endif
4626#ifdef PGMPOOL_WITH_USER_TRACKING
4627 pPage->cPresent = 0;
4628 pPage->iFirstPresent = ~0;
4629
4630 /*
4631 * Insert into the tracking and cache. If this fails, free the page.
4632 */
4633 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4634 if (RT_FAILURE(rc3))
4635 {
4636 if (rc3 != VERR_PGM_POOL_CLEARED)
4637 {
4638 pPool->cUsedPages--;
4639 pPage->enmKind = PGMPOOLKIND_FREE;
4640 pPage->GCPhys = NIL_RTGCPHYS;
4641 pPage->iNext = pPool->iFreeHead;
4642 pPool->iFreeHead = pPage->idx;
4643 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4644 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4645 return rc3;
4646 }
4647 Log(("pgmPoolTrackInsert failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4648 rc = VERR_PGM_POOL_FLUSHED;
4649 }
4650#endif /* PGMPOOL_WITH_USER_TRACKING */
4651
4652 /*
4653 * Commit the allocation, clear the page and return.
4654 */
4655#ifdef VBOX_WITH_STATISTICS
4656 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4657 pPool->cUsedPagesHigh = pPool->cUsedPages;
4658#endif
4659
4660 if (!pPage->fZeroed)
4661 {
4662 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4663 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4664 ASMMemZeroPage(pv);
4665 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4666 }
4667
4668 *ppPage = pPage;
4669 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4670 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4671 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4672 return rc;
4673}
4674
4675
4676/**
4677 * Frees a usage of a pool page.
4678 *
4679 * @param pVM The VM handle.
4680 * @param HCPhys The HC physical address of the shadow page.
4681 * @param iUser The shadow page pool index of the user table.
4682 * @param iUserTable The index into the user table (shadowed).
4683 */
4684void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4685{
4686 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4687 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4688 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4689}
4690
4691
4692/**
4693 * Gets a in-use page in the pool by it's physical address.
4694 *
4695 * @returns Pointer to the page.
4696 * @param pVM The VM handle.
4697 * @param HCPhys The HC physical address of the shadow page.
4698 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4699 */
4700PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4701{
4702 /** @todo profile this! */
4703 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4704 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4705 Log4(("pgmPoolGetPageByHCPhys: HCPhys=%RHp -> %p:{.idx=%d .GCPhys=%RGp .enmKind=%s}\n",
4706 HCPhys, pPage, pPage->idx, pPage->GCPhys, pgmPoolPoolKindToStr(pPage->enmKind)));
4707 return pPage;
4708}
4709
4710
4711/**
4712 * Flushes the entire cache.
4713 *
4714 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4715 * and execute this CR3 flush.
4716 *
4717 * @param pPool The pool.
4718 */
4719void pgmPoolFlushAll(PVM pVM)
4720{
4721 LogFlow(("pgmPoolFlushAll:\n"));
4722 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4723}
4724
4725#ifdef LOG_ENABLED
4726static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
4727{
4728 switch(enmKind)
4729 {
4730 case PGMPOOLKIND_INVALID:
4731 return "PGMPOOLKIND_INVALID";
4732 case PGMPOOLKIND_FREE:
4733 return "PGMPOOLKIND_FREE";
4734 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4735 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4736 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4737 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4738 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4739 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4740 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4741 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4742 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4743 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4744 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4745 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
4746 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4747 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
4748 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4749 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
4750 case PGMPOOLKIND_32BIT_PD:
4751 return "PGMPOOLKIND_32BIT_PD";
4752 case PGMPOOLKIND_32BIT_PD_PHYS:
4753 return "PGMPOOLKIND_32BIT_PD_PHYS";
4754 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4755 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
4756 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4757 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
4758 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4759 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
4760 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4761 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
4762 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4763 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
4764 case PGMPOOLKIND_PAE_PD_PHYS:
4765 return "PGMPOOLKIND_PAE_PD_PHYS";
4766 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4767 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
4768 case PGMPOOLKIND_PAE_PDPT:
4769 return "PGMPOOLKIND_PAE_PDPT";
4770 case PGMPOOLKIND_PAE_PDPT_PHYS:
4771 return "PGMPOOLKIND_PAE_PDPT_PHYS";
4772 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4773 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
4774 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4775 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
4776 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4777 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
4778 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4779 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
4780 case PGMPOOLKIND_64BIT_PML4:
4781 return "PGMPOOLKIND_64BIT_PML4";
4782 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4783 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
4784 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4785 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
4786 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4787 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
4788#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4789 case PGMPOOLKIND_ROOT_32BIT_PD:
4790 return "PGMPOOLKIND_ROOT_32BIT_PD";
4791 case PGMPOOLKIND_ROOT_PAE_PD:
4792 return "PGMPOOLKIND_ROOT_PAE_PD";
4793 case PGMPOOLKIND_ROOT_PDPT:
4794 return "PGMPOOLKIND_ROOT_PDPT";
4795#endif
4796 case PGMPOOLKIND_ROOT_NESTED:
4797 return "PGMPOOLKIND_ROOT_NESTED";
4798 }
4799 return "Unknown kind!";
4800}
4801#endif
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette