VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 22600

Last change on this file since 22600 was 22600, checked in by vboxsync, 16 years ago

Removed unnecessary EPT invlpg calls.
Invalidate the page who's R/W attribute was changed.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 181.4 KB
Line 
1/* $Id: PGMAllPool.cpp 22600 2009-08-31 12:19:56Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48RT_C_DECLS_BEGIN
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50#ifdef PGMPOOL_WITH_USER_TRACKING
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54#endif
55#ifdef PGMPOOL_WITH_CACHE
56static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
57#endif
58#ifdef PGMPOOL_WITH_MONITORING
59static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
60#endif
61#ifndef IN_RING3
62DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
63#endif
64#ifdef LOG_ENABLED
65static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
66#endif
67
68void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs);
69void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt);
70int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
71PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
72void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
73void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
74
75RT_C_DECLS_END
76
77
78/**
79 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
80 *
81 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
82 * @param enmKind The page kind.
83 */
84DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
85{
86 switch (enmKind)
87 {
88 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
89 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
90 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
91 return true;
92 default:
93 return false;
94 }
95}
96
97/** @def PGMPOOL_PAGE_2_LOCKED_PTR
98 * Maps a pool page pool into the current context and lock it (RC only).
99 *
100 * @returns VBox status code.
101 * @param pVM The VM handle.
102 * @param pPage The pool page.
103 *
104 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
105 * small page window employeed by that function. Be careful.
106 * @remark There is no need to assert on the result.
107 */
108#if defined(IN_RC)
109DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
110{
111 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
112
113 /* Make sure the dynamic mapping will not be reused. */
114 if (pv)
115 PGMDynLockHCPage(pVM, (uint8_t *)pv);
116
117 return pv;
118}
119#else
120# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
121#endif
122
123/** @def PGMPOOL_UNLOCK_PTR
124 * Unlock a previously locked dynamic caching (RC only).
125 *
126 * @returns VBox status code.
127 * @param pVM The VM handle.
128 * @param pPage The pool page.
129 *
130 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
131 * small page window employeed by that function. Be careful.
132 * @remark There is no need to assert on the result.
133 */
134#if defined(IN_RC)
135DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
136{
137 if (pvPage)
138 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
139}
140#else
141# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
142#endif
143
144
145#ifdef PGMPOOL_WITH_MONITORING
146/**
147 * Determin the size of a write instruction.
148 * @returns number of bytes written.
149 * @param pDis The disassembler state.
150 */
151static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
152{
153 /*
154 * This is very crude and possibly wrong for some opcodes,
155 * but since it's not really supposed to be called we can
156 * probably live with that.
157 */
158 return DISGetParamSize(pDis, &pDis->param1);
159}
160
161
162/**
163 * Flushes a chain of pages sharing the same access monitor.
164 *
165 * @returns VBox status code suitable for scheduling.
166 * @param pPool The pool.
167 * @param pPage A page in the chain.
168 */
169int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
170{
171 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
172
173 /*
174 * Find the list head.
175 */
176 uint16_t idx = pPage->idx;
177 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
178 {
179 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
180 {
181 idx = pPage->iMonitoredPrev;
182 Assert(idx != pPage->idx);
183 pPage = &pPool->aPages[idx];
184 }
185 }
186
187 /*
188 * Iterate the list flushing each shadow page.
189 */
190 int rc = VINF_SUCCESS;
191 for (;;)
192 {
193 idx = pPage->iMonitoredNext;
194 Assert(idx != pPage->idx);
195 if (pPage->idx >= PGMPOOL_IDX_FIRST)
196 {
197 int rc2 = pgmPoolFlushPage(pPool, pPage);
198 AssertRC(rc2);
199 }
200 /* next */
201 if (idx == NIL_PGMPOOL_IDX)
202 break;
203 pPage = &pPool->aPages[idx];
204 }
205 return rc;
206}
207
208
209/**
210 * Wrapper for getting the current context pointer to the entry being modified.
211 *
212 * @returns VBox status code suitable for scheduling.
213 * @param pVM VM Handle.
214 * @param pvDst Destination address
215 * @param pvSrc Source guest virtual address.
216 * @param GCPhysSrc The source guest physical address.
217 * @param cb Size of data to read
218 */
219DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
220{
221#if defined(IN_RING3)
222 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
223 return VINF_SUCCESS;
224#else
225 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
226 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
227#endif
228}
229
230/**
231 * Process shadow entries before they are changed by the guest.
232 *
233 * For PT entries we will clear them. For PD entries, we'll simply check
234 * for mapping conflicts and set the SyncCR3 FF if found.
235 *
236 * @param pVCpu VMCPU handle
237 * @param pPool The pool.
238 * @param pPage The head page.
239 * @param GCPhysFault The guest physical fault address.
240 * @param uAddress In R0 and GC this is the guest context fault address (flat).
241 * In R3 this is the host context 'fault' address.
242 * @param pDis The disassembler state for figuring out the write size.
243 * This need not be specified if the caller knows we won't do cross entry accesses.
244 */
245void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, PDISCPUSTATE pDis)
246{
247 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
248 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
249 const unsigned cbWrite = pDis ? pgmPoolDisasWriteSize(pDis) : 0;
250 PVM pVM = pPool->CTX_SUFF(pVM);
251
252 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)pvAddress, GCPhysFault, cbWrite));
253
254 for (;;)
255 {
256 union
257 {
258 void *pv;
259 PX86PT pPT;
260 PX86PTPAE pPTPae;
261 PX86PD pPD;
262 PX86PDPAE pPDPae;
263 PX86PDPT pPDPT;
264 PX86PML4 pPML4;
265 } uShw;
266
267 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
268
269 uShw.pv = NULL;
270 switch (pPage->enmKind)
271 {
272 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
273 {
274 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
275 const unsigned iShw = off / sizeof(X86PTE);
276 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
277 if (uShw.pPT->a[iShw].n.u1Present)
278 {
279# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
280 X86PTE GstPte;
281
282 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
283 AssertRC(rc);
284 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
285 pgmPoolTracDerefGCPhysHint(pPool, pPage,
286 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
287 GstPte.u & X86_PTE_PG_MASK);
288# endif
289 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
290 }
291 break;
292 }
293
294 /* page/2 sized */
295 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
296 {
297 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
298 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
299 {
300 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
301 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
302 if (uShw.pPTPae->a[iShw].n.u1Present)
303 {
304# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
305 X86PTE GstPte;
306 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
307 AssertRC(rc);
308
309 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
310 pgmPoolTracDerefGCPhysHint(pPool, pPage,
311 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
312 GstPte.u & X86_PTE_PG_MASK);
313# endif
314 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
315 }
316 }
317 break;
318 }
319
320 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
321 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
322 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
323 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
324 {
325 unsigned iGst = off / sizeof(X86PDE);
326 unsigned iShwPdpt = iGst / 256;
327 unsigned iShw = (iGst % 256) * 2;
328 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
329
330 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
331 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
332 {
333 for (unsigned i = 0; i < 2; i++)
334 {
335# ifndef IN_RING0
336 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
337 {
338 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
339 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
340 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
341 break;
342 }
343 else
344# endif /* !IN_RING0 */
345 if (uShw.pPDPae->a[iShw+i].n.u1Present)
346 {
347 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
348 pgmPoolFree(pVM,
349 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
350 pPage->idx,
351 iShw + i);
352 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
353 }
354
355 /* paranoia / a bit assumptive. */
356 if ( pDis
357 && (off & 3)
358 && (off & 3) + cbWrite > 4)
359 {
360 const unsigned iShw2 = iShw + 2 + i;
361 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
362 {
363# ifndef IN_RING0
364 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
365 {
366 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
367 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
368 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
369 break;
370 }
371 else
372# endif /* !IN_RING0 */
373 if (uShw.pPDPae->a[iShw2].n.u1Present)
374 {
375 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
376 pgmPoolFree(pVM,
377 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
378 pPage->idx,
379 iShw2);
380 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
381 }
382 }
383 }
384 }
385 }
386 break;
387 }
388
389 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
390 {
391 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
392 const unsigned iShw = off / sizeof(X86PTEPAE);
393 if (uShw.pPTPae->a[iShw].n.u1Present)
394 {
395# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
396 X86PTEPAE GstPte;
397 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
398 AssertRC(rc);
399
400 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
401 pgmPoolTracDerefGCPhysHint(pPool, pPage,
402 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
403 GstPte.u & X86_PTE_PAE_PG_MASK);
404# endif
405 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
406 }
407
408 /* paranoia / a bit assumptive. */
409 if ( pDis
410 && (off & 7)
411 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
412 {
413 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
414 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
415
416 if (uShw.pPTPae->a[iShw2].n.u1Present)
417 {
418# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
419 X86PTEPAE GstPte;
420# ifdef IN_RING3
421 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
422# else
423 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
424# endif
425 AssertRC(rc);
426 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
427 pgmPoolTracDerefGCPhysHint(pPool, pPage,
428 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
429 GstPte.u & X86_PTE_PAE_PG_MASK);
430# endif
431 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
432 }
433 }
434 break;
435 }
436
437 case PGMPOOLKIND_32BIT_PD:
438 {
439 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
440 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
441
442 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
443# ifndef IN_RING0
444 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
445 {
446 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
447 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
448 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
449 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
450 break;
451 }
452# endif /* !IN_RING0 */
453# ifndef IN_RING0
454 else
455# endif /* !IN_RING0 */
456 {
457 if (uShw.pPD->a[iShw].n.u1Present)
458 {
459 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
460 pgmPoolFree(pVM,
461 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
462 pPage->idx,
463 iShw);
464 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
465 }
466 }
467 /* paranoia / a bit assumptive. */
468 if ( pDis
469 && (off & 3)
470 && (off & 3) + cbWrite > sizeof(X86PTE))
471 {
472 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
473 if ( iShw2 != iShw
474 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
475 {
476# ifndef IN_RING0
477 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
478 {
479 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
480 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
481 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
482 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
483 break;
484 }
485# endif /* !IN_RING0 */
486# ifndef IN_RING0
487 else
488# endif /* !IN_RING0 */
489 {
490 if (uShw.pPD->a[iShw2].n.u1Present)
491 {
492 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
493 pgmPoolFree(pVM,
494 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
495 pPage->idx,
496 iShw2);
497 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
498 }
499 }
500 }
501 }
502#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
503 if ( uShw.pPD->a[iShw].n.u1Present
504 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
505 {
506 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
507# ifdef IN_RC /* TLB load - we're pushing things a bit... */
508 ASMProbeReadByte(pvAddress);
509# endif
510 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
511 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
512 }
513#endif
514 break;
515 }
516
517 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
518 {
519 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
520 const unsigned iShw = off / sizeof(X86PDEPAE);
521#ifndef IN_RING0
522 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
523 {
524 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
525 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
526 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
527 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
528 break;
529 }
530#endif /* !IN_RING0 */
531 /*
532 * Causes trouble when the guest uses a PDE to refer to the whole page table level
533 * structure. (Invalidate here; faults later on when it tries to change the page
534 * table entries -> recheck; probably only applies to the RC case.)
535 */
536# ifndef IN_RING0
537 else
538# endif /* !IN_RING0 */
539 {
540 if (uShw.pPDPae->a[iShw].n.u1Present)
541 {
542 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
543 pgmPoolFree(pVM,
544 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
545 pPage->idx,
546 iShw);
547 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
548 }
549 }
550 /* paranoia / a bit assumptive. */
551 if ( pDis
552 && (off & 7)
553 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
554 {
555 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
556 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
557
558#ifndef IN_RING0
559 if ( iShw2 != iShw
560 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
561 {
562 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
563 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
564 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
565 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
566 break;
567 }
568#endif /* !IN_RING0 */
569# ifndef IN_RING0
570 else
571# endif /* !IN_RING0 */
572 if (uShw.pPDPae->a[iShw2].n.u1Present)
573 {
574 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
575 pgmPoolFree(pVM,
576 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
577 pPage->idx,
578 iShw2);
579 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
580 }
581 }
582 break;
583 }
584
585 case PGMPOOLKIND_PAE_PDPT:
586 {
587 /*
588 * Hopefully this doesn't happen very often:
589 * - touching unused parts of the page
590 * - messing with the bits of pd pointers without changing the physical address
591 */
592 /* PDPT roots are not page aligned; 32 byte only! */
593 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
594
595 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
596 const unsigned iShw = offPdpt / sizeof(X86PDPE);
597 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
598 {
599# ifndef IN_RING0
600 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
601 {
602 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
603 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
604 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
605 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
606 break;
607 }
608# endif /* !IN_RING0 */
609# ifndef IN_RING0
610 else
611# endif /* !IN_RING0 */
612 if (uShw.pPDPT->a[iShw].n.u1Present)
613 {
614 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
615 pgmPoolFree(pVM,
616 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
617 pPage->idx,
618 iShw);
619 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
620 }
621
622 /* paranoia / a bit assumptive. */
623 if ( pDis
624 && (offPdpt & 7)
625 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
626 {
627 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
628 if ( iShw2 != iShw
629 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
630 {
631# ifndef IN_RING0
632 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
633 {
634 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
635 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
636 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
637 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
638 break;
639 }
640# endif /* !IN_RING0 */
641# ifndef IN_RING0
642 else
643# endif /* !IN_RING0 */
644 if (uShw.pPDPT->a[iShw2].n.u1Present)
645 {
646 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
647 pgmPoolFree(pVM,
648 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
649 pPage->idx,
650 iShw2);
651 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
652 }
653 }
654 }
655 }
656 break;
657 }
658
659#ifndef IN_RC
660 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
661 {
662 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
663 const unsigned iShw = off / sizeof(X86PDEPAE);
664 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
665 if (uShw.pPDPae->a[iShw].n.u1Present)
666 {
667 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
668 pgmPoolFree(pVM,
669 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
670 pPage->idx,
671 iShw);
672 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
673 }
674 /* paranoia / a bit assumptive. */
675 if ( pDis
676 && (off & 7)
677 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
678 {
679 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
680 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
681
682 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
683 if (uShw.pPDPae->a[iShw2].n.u1Present)
684 {
685 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
686 pgmPoolFree(pVM,
687 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
688 pPage->idx,
689 iShw2);
690 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
691 }
692 }
693 break;
694 }
695
696 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
697 {
698 /*
699 * Hopefully this doesn't happen very often:
700 * - messing with the bits of pd pointers without changing the physical address
701 */
702 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
703 {
704 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
705 const unsigned iShw = off / sizeof(X86PDPE);
706 if (uShw.pPDPT->a[iShw].n.u1Present)
707 {
708 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
709 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
710 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
711 }
712 /* paranoia / a bit assumptive. */
713 if ( pDis
714 && (off & 7)
715 && (off & 7) + cbWrite > sizeof(X86PDPE))
716 {
717 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
718 if (uShw.pPDPT->a[iShw2].n.u1Present)
719 {
720 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
721 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
722 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
723 }
724 }
725 }
726 break;
727 }
728
729 case PGMPOOLKIND_64BIT_PML4:
730 {
731 /*
732 * Hopefully this doesn't happen very often:
733 * - messing with the bits of pd pointers without changing the physical address
734 */
735 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
736 {
737 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
738 const unsigned iShw = off / sizeof(X86PDPE);
739 if (uShw.pPML4->a[iShw].n.u1Present)
740 {
741 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
742 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
743 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
744 }
745 /* paranoia / a bit assumptive. */
746 if ( pDis
747 && (off & 7)
748 && (off & 7) + cbWrite > sizeof(X86PDPE))
749 {
750 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
751 if (uShw.pPML4->a[iShw2].n.u1Present)
752 {
753 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
754 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
755 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
756 }
757 }
758 }
759 break;
760 }
761#endif /* IN_RING0 */
762
763 default:
764 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
765 }
766 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
767
768 /* next */
769 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
770 return;
771 pPage = &pPool->aPages[pPage->iMonitoredNext];
772 }
773}
774
775# ifndef IN_RING3
776/**
777 * Checks if a access could be a fork operation in progress.
778 *
779 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
780 *
781 * @returns true if it's likly that we're forking, otherwise false.
782 * @param pPool The pool.
783 * @param pDis The disassembled instruction.
784 * @param offFault The access offset.
785 */
786DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
787{
788 /*
789 * i386 linux is using btr to clear X86_PTE_RW.
790 * The functions involved are (2.6.16 source inspection):
791 * clear_bit
792 * ptep_set_wrprotect
793 * copy_one_pte
794 * copy_pte_range
795 * copy_pmd_range
796 * copy_pud_range
797 * copy_page_range
798 * dup_mmap
799 * dup_mm
800 * copy_mm
801 * copy_process
802 * do_fork
803 */
804 if ( pDis->pCurInstr->opcode == OP_BTR
805 && !(offFault & 4)
806 /** @todo Validate that the bit index is X86_PTE_RW. */
807 )
808 {
809 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
810 return true;
811 }
812 return false;
813}
814
815
816/**
817 * Determine whether the page is likely to have been reused.
818 *
819 * @returns true if we consider the page as being reused for a different purpose.
820 * @returns false if we consider it to still be a paging page.
821 * @param pVM VM Handle.
822 * @param pVCpu VMCPU Handle.
823 * @param pRegFrame Trap register frame.
824 * @param pDis The disassembly info for the faulting instruction.
825 * @param pvFault The fault address.
826 *
827 * @remark The REP prefix check is left to the caller because of STOSD/W.
828 */
829DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
830{
831#ifndef IN_RC
832 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
833 if ( HWACCMHasPendingIrq(pVM)
834 && (pRegFrame->rsp - pvFault) < 32)
835 {
836 /* Fault caused by stack writes while trying to inject an interrupt event. */
837 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
838 return true;
839 }
840#else
841 NOREF(pVM); NOREF(pvFault);
842#endif
843
844 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
845
846 /* Non-supervisor mode write means it's used for something else. */
847 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
848 return true;
849
850 switch (pDis->pCurInstr->opcode)
851 {
852 /* call implies the actual push of the return address faulted */
853 case OP_CALL:
854 Log4(("pgmPoolMonitorIsReused: CALL\n"));
855 return true;
856 case OP_PUSH:
857 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
858 return true;
859 case OP_PUSHF:
860 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
861 return true;
862 case OP_PUSHA:
863 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
864 return true;
865 case OP_FXSAVE:
866 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
867 return true;
868 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
869 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
870 return true;
871 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
872 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
873 return true;
874 case OP_MOVSWD:
875 case OP_STOSWD:
876 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
877 && pRegFrame->rcx >= 0x40
878 )
879 {
880 Assert(pDis->mode == CPUMODE_64BIT);
881
882 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
883 return true;
884 }
885 return false;
886 }
887 if ( ( (pDis->param1.flags & USE_REG_GEN32)
888 || (pDis->param1.flags & USE_REG_GEN64))
889 && (pDis->param1.base.reg_gen == USE_REG_ESP))
890 {
891 Log4(("pgmPoolMonitorIsReused: ESP\n"));
892 return true;
893 }
894
895 return false;
896}
897
898
899/**
900 * Flushes the page being accessed.
901 *
902 * @returns VBox status code suitable for scheduling.
903 * @param pVM The VM handle.
904 * @param pVCpu The VMCPU handle.
905 * @param pPool The pool.
906 * @param pPage The pool page (head).
907 * @param pDis The disassembly of the write instruction.
908 * @param pRegFrame The trap register frame.
909 * @param GCPhysFault The fault address as guest physical address.
910 * @param pvFault The fault address.
911 */
912static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
913 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
914{
915 /*
916 * First, do the flushing.
917 */
918 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
919
920 /*
921 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
922 * @todo: why is this necessary? an instruction restart would be sufficient, wouldn't it?
923 */
924 uint32_t cbWritten;
925 int rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten);
926 if (RT_SUCCESS(rc2))
927 pRegFrame->rip += pDis->opsize;
928 else if (rc2 == VERR_EM_INTERPRETER)
929 {
930#ifdef IN_RC
931 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
932 {
933 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
934 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
935 rc = VINF_SUCCESS;
936 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
937 }
938 else
939#endif
940 {
941 rc = VINF_EM_RAW_EMULATE_INSTR;
942 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
943 }
944 }
945 else
946 rc = rc2;
947
948 /* See use in pgmPoolAccessHandlerSimple(). */
949 PGM_INVL_VCPU_TLBS(pVCpu);
950
951 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
952 return rc;
953
954}
955
956
957/**
958 * Handles the STOSD write accesses.
959 *
960 * @returns VBox status code suitable for scheduling.
961 * @param pVM The VM handle.
962 * @param pPool The pool.
963 * @param pPage The pool page (head).
964 * @param pDis The disassembly of the write instruction.
965 * @param pRegFrame The trap register frame.
966 * @param GCPhysFault The fault address as guest physical address.
967 * @param pvFault The fault address.
968 */
969DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
970 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
971{
972 unsigned uIncrement = pDis->param1.size;
973
974 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
975 Assert(pRegFrame->rcx <= 0x20);
976
977#ifdef VBOX_STRICT
978 if (pDis->opmode == CPUMODE_32BIT)
979 Assert(uIncrement == 4);
980 else
981 Assert(uIncrement == 8);
982#endif
983
984 Log3(("pgmPoolAccessHandlerSTOSD\n"));
985
986 /*
987 * Increment the modification counter and insert it into the list
988 * of modified pages the first time.
989 */
990 if (!pPage->cModifications++)
991 pgmPoolMonitorModifiedInsert(pPool, pPage);
992
993 /*
994 * Execute REP STOSD.
995 *
996 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
997 * write situation, meaning that it's safe to write here.
998 */
999 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1000 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
1001 while (pRegFrame->rcx)
1002 {
1003#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1004 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1005 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1006 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1007#else
1008 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1009#endif
1010#ifdef IN_RC
1011 *(uint32_t *)pu32 = pRegFrame->eax;
1012#else
1013 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
1014#endif
1015 pu32 += uIncrement;
1016 GCPhysFault += uIncrement;
1017 pRegFrame->rdi += uIncrement;
1018 pRegFrame->rcx--;
1019 }
1020 pRegFrame->rip += pDis->opsize;
1021
1022#ifdef IN_RC
1023 /* See use in pgmPoolAccessHandlerSimple(). */
1024 PGM_INVL_VCPU_TLBS(pVCpu);
1025#endif
1026
1027 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1028 return VINF_SUCCESS;
1029}
1030
1031
1032/**
1033 * Handles the simple write accesses.
1034 *
1035 * @returns VBox status code suitable for scheduling.
1036 * @param pVM The VM handle.
1037 * @param pVCpu The VMCPU handle.
1038 * @param pPool The pool.
1039 * @param pPage The pool page (head).
1040 * @param pDis The disassembly of the write instruction.
1041 * @param pRegFrame The trap register frame.
1042 * @param GCPhysFault The fault address as guest physical address.
1043 * @param pvFault The fault address.
1044 */
1045DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1046 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1047{
1048 Log3(("pgmPoolAccessHandlerSimple\n"));
1049 /*
1050 * Increment the modification counter and insert it into the list
1051 * of modified pages the first time.
1052 */
1053 if (!pPage->cModifications++)
1054 pgmPoolMonitorModifiedInsert(pPool, pPage);
1055
1056 /*
1057 * Clear all the pages. ASSUMES that pvFault is readable.
1058 */
1059#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1060 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1061 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, pDis);
1062 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1063#else
1064 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, pDis);
1065#endif
1066
1067 /*
1068 * Interpret the instruction.
1069 */
1070 uint32_t cb;
1071 int rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb);
1072 if (RT_SUCCESS(rc))
1073 pRegFrame->rip += pDis->opsize;
1074 else if (rc == VERR_EM_INTERPRETER)
1075 {
1076 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1077 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1078 rc = VINF_EM_RAW_EMULATE_INSTR;
1079 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1080 }
1081
1082#ifdef IN_RC
1083 /*
1084 * Quick hack, with logging enabled we're getting stale
1085 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1086 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1087 * have to be fixed to support this. But that'll have to wait till next week.
1088 *
1089 * An alternative is to keep track of the changed PTEs together with the
1090 * GCPhys from the guest PT. This may proove expensive though.
1091 *
1092 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1093 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1094 */
1095 PGM_INVL_VCPU_TLBS(pVCpu);
1096#endif
1097
1098 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1099 return rc;
1100}
1101
1102/**
1103 * \#PF Handler callback for PT write accesses.
1104 *
1105 * @returns VBox status code (appropriate for GC return).
1106 * @param pVM VM Handle.
1107 * @param uErrorCode CPU Error code.
1108 * @param pRegFrame Trap register frame.
1109 * NULL on DMA and other non CPU access.
1110 * @param pvFault The fault address (cr2).
1111 * @param GCPhysFault The GC physical address corresponding to pvFault.
1112 * @param pvUser User argument.
1113 */
1114DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1115{
1116 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1117 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1118 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1119 PVMCPU pVCpu = VMMGetCpu(pVM);
1120 unsigned cMaxModifications;
1121 bool fForcedFlush = false;
1122
1123 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1124
1125 pgmLock(pVM);
1126 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1127 {
1128 /* Pool page changed while we were waiting for the lock; ignore. */
1129 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1130 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1131 pgmUnlock(pVM);
1132 return VINF_SUCCESS;
1133 }
1134
1135 /*
1136 * Disassemble the faulting instruction.
1137 */
1138 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1139 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1140 AssertReturnStmt(rc == VINF_SUCCESS, pgmUnlock(pVM), rc);
1141
1142 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1143
1144 /*
1145 * We should ALWAYS have the list head as user parameter. This
1146 * is because we use that page to record the changes.
1147 */
1148 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1149#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1150 Assert(!pPage->fDirty);
1151#endif
1152
1153 /* Maximum nr of modifications depends on the guest mode. */
1154 if (pDis->mode == CPUMODE_32BIT)
1155 cMaxModifications = 48;
1156 else
1157 cMaxModifications = 24;
1158
1159 /*
1160 * Incremental page table updates should weight more than random ones.
1161 * (Only applies when started from offset 0)
1162 */
1163 pVCpu->pgm.s.cPoolAccessHandler++;
1164 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1165 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1166 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1167 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1168 {
1169 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1170 pPage->cModifications = pPage->cModifications * 2;
1171 pPage->pvLastAccessHandlerFault = pvFault;
1172 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1173 if (pPage->cModifications >= cMaxModifications)
1174 {
1175 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1176 fForcedFlush = true;
1177 }
1178 }
1179
1180 if (pPage->cModifications >= cMaxModifications)
1181 Log(("Mod overflow %VGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1182
1183 /*
1184 * Check if it's worth dealing with.
1185 */
1186 bool fReused = false;
1187 bool fNotReusedNotForking = false;
1188 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1189 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1190 )
1191 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1192 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1193 {
1194 /*
1195 * Simple instructions, no REP prefix.
1196 */
1197 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1198 {
1199 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1200
1201 /* A mov instruction to change the first page table entry will be remembered so we can detect
1202 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1203 */
1204 if ( rc == VINF_SUCCESS
1205 && pDis->pCurInstr->opcode == OP_MOV
1206 && (pvFault & PAGE_OFFSET_MASK) == 0)
1207 {
1208 pPage->pvLastAccessHandlerFault = pvFault;
1209 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1210 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1211 /* Make sure we don't kick out a page too quickly. */
1212 if (pPage->cModifications > 8)
1213 pPage->cModifications = 2;
1214 }
1215 else
1216 if (pPage->pvLastAccessHandlerFault == pvFault)
1217 {
1218 /* ignore the 2nd write to this page table entry. */
1219 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1220 }
1221 else
1222 {
1223 pPage->pvLastAccessHandlerFault = 0;
1224 pPage->pvLastAccessHandlerRip = 0;
1225 }
1226
1227 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1228 pgmUnlock(pVM);
1229 return rc;
1230 }
1231
1232 /*
1233 * Windows is frequently doing small memset() operations (netio test 4k+).
1234 * We have to deal with these or we'll kill the cache and performance.
1235 */
1236 if ( pDis->pCurInstr->opcode == OP_STOSWD
1237 && !pRegFrame->eflags.Bits.u1DF
1238 && pDis->opmode == pDis->mode
1239 && pDis->addrmode == pDis->mode)
1240 {
1241 bool fValidStosd = false;
1242
1243 if ( pDis->mode == CPUMODE_32BIT
1244 && pDis->prefix == PREFIX_REP
1245 && pRegFrame->ecx <= 0x20
1246 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1247 && !((uintptr_t)pvFault & 3)
1248 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1249 )
1250 {
1251 fValidStosd = true;
1252 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1253 }
1254 else
1255 if ( pDis->mode == CPUMODE_64BIT
1256 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1257 && pRegFrame->rcx <= 0x20
1258 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1259 && !((uintptr_t)pvFault & 7)
1260 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1261 )
1262 {
1263 fValidStosd = true;
1264 }
1265
1266 if (fValidStosd)
1267 {
1268 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1269 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1270 pgmUnlock(pVM);
1271 return rc;
1272 }
1273 }
1274
1275 /* REP prefix, don't bother. */
1276 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1277 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1278 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1279 fNotReusedNotForking = true;
1280 }
1281
1282#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1283 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1284 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1285 */
1286 if ( pPage->cModifications >= cMaxModifications
1287 && !fForcedFlush
1288 && pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1289 && ( fNotReusedNotForking
1290 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1291 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1292 )
1293 )
1294 {
1295 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1296 Assert(pPage->fDirty == false);
1297
1298 /* Flush any monitored duplicates as we will disable write protection. */
1299 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1300 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1301 {
1302 PPGMPOOLPAGE pPageHead = pPage;
1303
1304 /* Find the monitor head. */
1305 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1306 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1307
1308 while (pPageHead)
1309 {
1310 unsigned idxNext = pPageHead->iMonitoredNext;
1311
1312 if (pPageHead != pPage)
1313 {
1314 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1315 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1316 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1317 AssertRC(rc2);
1318 }
1319
1320 if (idxNext == NIL_PGMPOOL_IDX)
1321 break;
1322
1323 pPageHead = &pPool->aPages[idxNext];
1324 }
1325 }
1326
1327 /* The flushing above might fail for locked pages, so double check. */
1328 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1329 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1330 {
1331 /* Temporarily allow write access to the page table again. */
1332 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
1333 if (rc == VINF_SUCCESS)
1334 {
1335 rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
1336 AssertMsg(rc == VINF_SUCCESS
1337 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1338 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1339 || rc == VERR_PAGE_NOT_PRESENT,
1340 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1341
1342 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1343 pPage->pvDirtyFault = pvFault;
1344
1345 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1346 pgmUnlock(pVM);
1347 return rc;
1348 }
1349 }
1350 }
1351#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1352
1353 /*
1354 * Not worth it, so flush it.
1355 *
1356 * If we considered it to be reused, don't go back to ring-3
1357 * to emulate failed instructions since we usually cannot
1358 * interpret then. This may be a bit risky, in which case
1359 * the reuse detection must be fixed.
1360 */
1361 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1362 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1363 rc = VINF_SUCCESS;
1364 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1365 pgmUnlock(pVM);
1366 return rc;
1367}
1368
1369# endif /* !IN_RING3 */
1370
1371# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1372/**
1373 * Check references to guest physical memory in a PAE / PAE page table.
1374 *
1375 * @param pPool The pool.
1376 * @param pPage The page.
1377 * @param pShwPT The shadow page table (mapping of the page).
1378 * @param pGstPT The guest page table.
1379 */
1380DECLINLINE(void) pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
1381{
1382 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
1383 {
1384 if (pShwPT->a[i].n.u1Present)
1385 {
1386 RTHCPHYS HCPhys = -1;
1387 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1388 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1389 }
1390 }
1391}
1392
1393/**
1394 * Clear references to guest physical memory in a PAE / PAE page table.
1395 *
1396 * @returns nr of changed PTEs
1397 * @param pPool The pool.
1398 * @param pPage The page.
1399 * @param pShwPT The shadow page table (mapping of the page).
1400 * @param pGstPT The guest page table.
1401 * @param pOldGstPT The old cached guest page table.
1402 */
1403DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT, PCX86PTPAE pOldGstPT)
1404{
1405 unsigned cChanged = 0;
1406
1407 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
1408 {
1409 if (pShwPT->a[i].n.u1Present)
1410 {
1411 /* The the old cached PTE is identical, then there's no need to flush the shadow copy. */
1412 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1413 {
1414#ifdef VBOX_STRICT
1415 RTHCPHYS HCPhys = -1;
1416 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1417 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1418#endif
1419 uint64_t uHostAttr = pShwPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1420 bool fHostRW = !!(pShwPT->a[i].u & X86_PTE_RW);
1421 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1422 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1423
1424 if ( uHostAttr == uGuestAttr
1425 && fHostRW <= fGuestRW)
1426 continue;
1427 }
1428 cChanged++;
1429 /* Something was changed, so flush it. */
1430 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
1431 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1432 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
1433 ASMAtomicWriteSize(&pShwPT->a[i].u, 0);
1434 }
1435 }
1436 return cChanged;
1437}
1438
1439
1440/**
1441 * Flush a dirty page
1442 *
1443 * @param pVM VM Handle.
1444 * @param pPool The pool.
1445 * @param idxSlot Dirty array slot index
1446 * @param fForceRemoval Force removal from the dirty page list
1447 */
1448static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fForceRemoval = false)
1449{
1450 PPGMPOOLPAGE pPage;
1451 unsigned idxPage;
1452
1453 Assert(idxSlot < RT_ELEMENTS(pPool->aIdxDirtyPages));
1454 if (pPool->aIdxDirtyPages[idxSlot] == NIL_PGMPOOL_IDX)
1455 return;
1456
1457 idxPage = pPool->aIdxDirtyPages[idxSlot];
1458 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1459 pPage = &pPool->aPages[idxPage];
1460 Assert(pPage->idx == idxPage);
1461 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1462
1463 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1464 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1465
1466 /* Flush those PTEs that have changed. */
1467 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1468 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1469 void *pvGst;
1470 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1471 unsigned cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst, (PCX86PTPAE)&pPool->aDirtyPages[idxSlot][0]);
1472 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1473
1474 /** Note: we might want to consider keeping the dirty page active in case there were many changes. */
1475
1476 /* Write protect the page again to catch all write accesses. */
1477 rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys);
1478 Assert(rc == VINF_SUCCESS);
1479 pPage->fDirty = false;
1480
1481#ifdef VBOX_STRICT
1482 uint64_t fFlags = 0;
1483 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, NULL);
1484 AssertMsg( (rc == VINF_SUCCESS && !(fFlags & X86_PTE_RW))
1485 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1486 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1487 || rc == VERR_PAGE_NOT_PRESENT,
1488 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1489#endif
1490
1491 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1492 Assert(pPage->cModifications);
1493 if (cChanges < 4)
1494 pPage->cModifications = 1; /* must use > 0 here */
1495 else
1496 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1497
1498 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1499 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages))
1500 pPool->idxFreeDirtyPage = idxSlot;
1501
1502 pPool->cDirtyPages--;
1503 pPool->aIdxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1504 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1505 Log(("Removed dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1506}
1507
1508# ifndef IN_RING3
1509/**
1510 * Add a new dirty page
1511 *
1512 * @param pVM VM Handle.
1513 * @param pPool The pool.
1514 * @param pPage The page.
1515 */
1516void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1517{
1518 unsigned idxFree;
1519
1520 Assert(PGMIsLocked(pVM));
1521 AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
1522 Assert(!pPage->fDirty);
1523
1524 idxFree = pPool->idxFreeDirtyPage;
1525 Assert(idxFree < RT_ELEMENTS(pPool->aIdxDirtyPages));
1526 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1527
1528 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aIdxDirtyPages))
1529 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* force removal */);
1530 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages));
1531 AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1532
1533 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1534
1535 /* Make a copy of the guest page table as we require valid GCPhys addresses when removing
1536 * references to physical pages. (the HCPhys linear lookup is *extremely* expensive!)
1537 */
1538 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1539 void *pvGst;
1540 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1541 memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
1542 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1543
1544 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1545 pPage->fDirty = true;
1546 pPage->idxDirty = idxFree;
1547 pPool->aIdxDirtyPages[idxFree] = pPage->idx;
1548 pPool->cDirtyPages++;
1549
1550 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1551 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages)
1552 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1553 {
1554 unsigned i;
1555 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1556 {
1557 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1558 if (pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1559 {
1560 pPool->idxFreeDirtyPage = idxFree;
1561 break;
1562 }
1563 }
1564 Assert(i != RT_ELEMENTS(pPool->aIdxDirtyPages));
1565 }
1566
1567 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages) || pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1568 return;
1569}
1570# endif /* !IN_RING3 */
1571
1572/**
1573 * Reset all dirty pages by reinstating page monitoring.
1574 *
1575 * @param pVM VM Handle.
1576 * @param fForceRemoval Force removal of all dirty pages
1577 */
1578void pgmPoolResetDirtyPages(PVM pVM, bool fForceRemoval)
1579{
1580 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1581 Assert(PGMIsLocked(pVM));
1582 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1583
1584 if (!pPool->cDirtyPages)
1585 return;
1586
1587 Log(("pgmPoolResetDirtyPages\n"));
1588 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1589 pgmPoolFlushDirtyPage(pVM, pPool, i, fForceRemoval);
1590
1591 pPool->idxFreeDirtyPage = 0;
1592 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1593 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1594 {
1595 unsigned i;
1596 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1597 {
1598 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1599 {
1600 pPool->idxFreeDirtyPage = i;
1601 break;
1602 }
1603 }
1604 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1605 }
1606
1607 Assert(pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages));
1608 return;
1609}
1610# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1611#endif /* PGMPOOL_WITH_MONITORING */
1612
1613#ifdef PGMPOOL_WITH_CACHE
1614
1615/**
1616 * Inserts a page into the GCPhys hash table.
1617 *
1618 * @param pPool The pool.
1619 * @param pPage The page.
1620 */
1621DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1622{
1623 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1624 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1625 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1626 pPage->iNext = pPool->aiHash[iHash];
1627 pPool->aiHash[iHash] = pPage->idx;
1628}
1629
1630
1631/**
1632 * Removes a page from the GCPhys hash table.
1633 *
1634 * @param pPool The pool.
1635 * @param pPage The page.
1636 */
1637DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1638{
1639 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1640 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1641 if (pPool->aiHash[iHash] == pPage->idx)
1642 pPool->aiHash[iHash] = pPage->iNext;
1643 else
1644 {
1645 uint16_t iPrev = pPool->aiHash[iHash];
1646 for (;;)
1647 {
1648 const int16_t i = pPool->aPages[iPrev].iNext;
1649 if (i == pPage->idx)
1650 {
1651 pPool->aPages[iPrev].iNext = pPage->iNext;
1652 break;
1653 }
1654 if (i == NIL_PGMPOOL_IDX)
1655 {
1656 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1657 break;
1658 }
1659 iPrev = i;
1660 }
1661 }
1662 pPage->iNext = NIL_PGMPOOL_IDX;
1663}
1664
1665
1666/**
1667 * Frees up one cache page.
1668 *
1669 * @returns VBox status code.
1670 * @retval VINF_SUCCESS on success.
1671 * @param pPool The pool.
1672 * @param iUser The user index.
1673 */
1674static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1675{
1676#ifndef IN_RC
1677 const PVM pVM = pPool->CTX_SUFF(pVM);
1678#endif
1679 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1680 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1681
1682 /*
1683 * Select one page from the tail of the age list.
1684 */
1685 PPGMPOOLPAGE pPage;
1686 for (unsigned iLoop = 0; ; iLoop++)
1687 {
1688 uint16_t iToFree = pPool->iAgeTail;
1689 if (iToFree == iUser)
1690 iToFree = pPool->aPages[iToFree].iAgePrev;
1691/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1692 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1693 {
1694 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1695 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1696 {
1697 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1698 continue;
1699 iToFree = i;
1700 break;
1701 }
1702 }
1703*/
1704 Assert(iToFree != iUser);
1705 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1706 pPage = &pPool->aPages[iToFree];
1707
1708 /*
1709 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1710 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1711 */
1712 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1713 break;
1714 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1715 pgmPoolCacheUsed(pPool, pPage);
1716 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1717 }
1718
1719 /*
1720 * Found a usable page, flush it and return.
1721 */
1722 int rc = pgmPoolFlushPage(pPool, pPage);
1723 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1724 if (rc == VINF_SUCCESS)
1725 PGM_INVL_ALL_VCPU_TLBS(pVM);
1726 return rc;
1727}
1728
1729
1730/**
1731 * Checks if a kind mismatch is really a page being reused
1732 * or if it's just normal remappings.
1733 *
1734 * @returns true if reused and the cached page (enmKind1) should be flushed
1735 * @returns false if not reused.
1736 * @param enmKind1 The kind of the cached page.
1737 * @param enmKind2 The kind of the requested page.
1738 */
1739static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1740{
1741 switch (enmKind1)
1742 {
1743 /*
1744 * Never reuse them. There is no remapping in non-paging mode.
1745 */
1746 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1747 case PGMPOOLKIND_32BIT_PD_PHYS:
1748 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1749 case PGMPOOLKIND_PAE_PD_PHYS:
1750 case PGMPOOLKIND_PAE_PDPT_PHYS:
1751 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1752 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1753 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1754 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1755 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1756 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1757 return false;
1758
1759 /*
1760 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1761 */
1762 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1763 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1764 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1765 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1766 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1767 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1768 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1769 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1770 case PGMPOOLKIND_32BIT_PD:
1771 case PGMPOOLKIND_PAE_PDPT:
1772 switch (enmKind2)
1773 {
1774 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1775 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1776 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1777 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1778 case PGMPOOLKIND_64BIT_PML4:
1779 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1780 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1781 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1782 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1783 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1784 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1785 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1786 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1787 return true;
1788 default:
1789 return false;
1790 }
1791
1792 /*
1793 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1794 */
1795 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1796 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1797 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1798 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1799 case PGMPOOLKIND_64BIT_PML4:
1800 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1801 switch (enmKind2)
1802 {
1803 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1804 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1805 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1806 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1807 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1808 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1809 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1810 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1811 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1812 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1813 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1814 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1815 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1816 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1817 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1818 return true;
1819 default:
1820 return false;
1821 }
1822
1823 /*
1824 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1825 */
1826 case PGMPOOLKIND_ROOT_NESTED:
1827 return false;
1828
1829 default:
1830 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1831 }
1832}
1833
1834
1835/**
1836 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1837 *
1838 * @returns VBox status code.
1839 * @retval VINF_PGM_CACHED_PAGE on success.
1840 * @retval VERR_FILE_NOT_FOUND if not found.
1841 * @param pPool The pool.
1842 * @param GCPhys The GC physical address of the page we're gonna shadow.
1843 * @param enmKind The kind of mapping.
1844 * @param enmAccess Access type for the mapping (only relevant for big pages)
1845 * @param iUser The shadow page pool index of the user table.
1846 * @param iUserTable The index into the user table (shadowed).
1847 * @param ppPage Where to store the pointer to the page.
1848 */
1849static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1850{
1851#ifndef IN_RC
1852 const PVM pVM = pPool->CTX_SUFF(pVM);
1853#endif
1854 /*
1855 * Look up the GCPhys in the hash.
1856 */
1857 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1858 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
1859 if (i != NIL_PGMPOOL_IDX)
1860 {
1861 do
1862 {
1863 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1864 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1865 if (pPage->GCPhys == GCPhys)
1866 {
1867 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
1868 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
1869 {
1870 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
1871 * doesn't flush it in case there are no more free use records.
1872 */
1873 pgmPoolCacheUsed(pPool, pPage);
1874
1875 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1876 if (RT_SUCCESS(rc))
1877 {
1878 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
1879 *ppPage = pPage;
1880 if (pPage->cModifications)
1881 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
1882 STAM_COUNTER_INC(&pPool->StatCacheHits);
1883 return VINF_PGM_CACHED_PAGE;
1884 }
1885 return rc;
1886 }
1887
1888 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
1889 {
1890 /*
1891 * The kind is different. In some cases we should now flush the page
1892 * as it has been reused, but in most cases this is normal remapping
1893 * of PDs as PT or big pages using the GCPhys field in a slightly
1894 * different way than the other kinds.
1895 */
1896 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1897 {
1898 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1899 pgmPoolFlushPage(pPool, pPage);
1900 PGM_INVL_VCPU_TLBS(VMMGetCpu(pVM)); /* see PT handler. */
1901 break;
1902 }
1903 }
1904 }
1905
1906 /* next */
1907 i = pPage->iNext;
1908 } while (i != NIL_PGMPOOL_IDX);
1909 }
1910
1911 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
1912 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1913 return VERR_FILE_NOT_FOUND;
1914}
1915
1916
1917/**
1918 * Inserts a page into the cache.
1919 *
1920 * @param pPool The pool.
1921 * @param pPage The cached page.
1922 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1923 */
1924static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1925{
1926 /*
1927 * Insert into the GCPhys hash if the page is fit for that.
1928 */
1929 Assert(!pPage->fCached);
1930 if (fCanBeCached)
1931 {
1932 pPage->fCached = true;
1933 pgmPoolHashInsert(pPool, pPage);
1934 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1935 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1936 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1937 }
1938 else
1939 {
1940 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1941 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1942 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1943 }
1944
1945 /*
1946 * Insert at the head of the age list.
1947 */
1948 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1949 pPage->iAgeNext = pPool->iAgeHead;
1950 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1951 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1952 else
1953 pPool->iAgeTail = pPage->idx;
1954 pPool->iAgeHead = pPage->idx;
1955}
1956
1957
1958/**
1959 * Flushes a cached page.
1960 *
1961 * @param pPool The pool.
1962 * @param pPage The cached page.
1963 */
1964static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1965{
1966 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1967
1968 /*
1969 * Remove the page from the hash.
1970 */
1971 if (pPage->fCached)
1972 {
1973 pPage->fCached = false;
1974 pgmPoolHashRemove(pPool, pPage);
1975 }
1976 else
1977 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1978
1979 /*
1980 * Remove it from the age list.
1981 */
1982 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1983 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1984 else
1985 pPool->iAgeTail = pPage->iAgePrev;
1986 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1987 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1988 else
1989 pPool->iAgeHead = pPage->iAgeNext;
1990 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1991 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1992}
1993
1994#endif /* PGMPOOL_WITH_CACHE */
1995#ifdef PGMPOOL_WITH_MONITORING
1996
1997/**
1998 * Looks for pages sharing the monitor.
1999 *
2000 * @returns Pointer to the head page.
2001 * @returns NULL if not found.
2002 * @param pPool The Pool
2003 * @param pNewPage The page which is going to be monitored.
2004 */
2005static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2006{
2007#ifdef PGMPOOL_WITH_CACHE
2008 /*
2009 * Look up the GCPhys in the hash.
2010 */
2011 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2012 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2013 if (i == NIL_PGMPOOL_IDX)
2014 return NULL;
2015 do
2016 {
2017 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2018 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2019 && pPage != pNewPage)
2020 {
2021 switch (pPage->enmKind)
2022 {
2023 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2024 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2025 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2026 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2027 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2028 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2029 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2030 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2031 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2032 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2033 case PGMPOOLKIND_64BIT_PML4:
2034 case PGMPOOLKIND_32BIT_PD:
2035 case PGMPOOLKIND_PAE_PDPT:
2036 {
2037 /* find the head */
2038 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2039 {
2040 Assert(pPage->iMonitoredPrev != pPage->idx);
2041 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2042 }
2043 return pPage;
2044 }
2045
2046 /* ignore, no monitoring. */
2047 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2048 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2049 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2050 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2051 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2052 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2053 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2054 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2055 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2056 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2057 case PGMPOOLKIND_ROOT_NESTED:
2058 case PGMPOOLKIND_PAE_PD_PHYS:
2059 case PGMPOOLKIND_PAE_PDPT_PHYS:
2060 case PGMPOOLKIND_32BIT_PD_PHYS:
2061 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2062 break;
2063 default:
2064 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2065 }
2066 }
2067
2068 /* next */
2069 i = pPage->iNext;
2070 } while (i != NIL_PGMPOOL_IDX);
2071#endif
2072 return NULL;
2073}
2074
2075
2076/**
2077 * Enabled write monitoring of a guest page.
2078 *
2079 * @returns VBox status code.
2080 * @retval VINF_SUCCESS on success.
2081 * @param pPool The pool.
2082 * @param pPage The cached page.
2083 */
2084static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2085{
2086 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
2087
2088 /*
2089 * Filter out the relevant kinds.
2090 */
2091 switch (pPage->enmKind)
2092 {
2093 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2094 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2095 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2096 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2097 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2098 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2099 case PGMPOOLKIND_64BIT_PML4:
2100 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2101 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2102 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2103 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2104 case PGMPOOLKIND_32BIT_PD:
2105 case PGMPOOLKIND_PAE_PDPT:
2106 break;
2107
2108 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2109 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2110 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2111 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2112 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2113 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2114 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2115 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2116 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2117 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2118 case PGMPOOLKIND_ROOT_NESTED:
2119 /* Nothing to monitor here. */
2120 return VINF_SUCCESS;
2121
2122 case PGMPOOLKIND_32BIT_PD_PHYS:
2123 case PGMPOOLKIND_PAE_PDPT_PHYS:
2124 case PGMPOOLKIND_PAE_PD_PHYS:
2125 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2126 /* Nothing to monitor here. */
2127 return VINF_SUCCESS;
2128#ifdef PGMPOOL_WITH_MIXED_PT_CR3
2129 break;
2130#else
2131 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2132#endif
2133 default:
2134 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2135 }
2136
2137 /*
2138 * Install handler.
2139 */
2140 int rc;
2141 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2142 if (pPageHead)
2143 {
2144 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2145 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2146
2147#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2148 if (pPageHead->fDirty)
2149 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, true /* force removal */);
2150#endif
2151
2152 pPage->iMonitoredPrev = pPageHead->idx;
2153 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2154 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2155 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2156 pPageHead->iMonitoredNext = pPage->idx;
2157 rc = VINF_SUCCESS;
2158 }
2159 else
2160 {
2161 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2162 PVM pVM = pPool->CTX_SUFF(pVM);
2163 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2164 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2165 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
2166 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2167 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2168 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2169 pPool->pszAccessHandler);
2170 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2171 * the heap size should suffice. */
2172 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2173 Assert(!(VMMGetCpu(pVM)->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(VMMGetCpu(pVM), VMCPU_FF_PGM_SYNC_CR3));
2174 }
2175 pPage->fMonitored = true;
2176 return rc;
2177}
2178
2179
2180/**
2181 * Disables write monitoring of a guest page.
2182 *
2183 * @returns VBox status code.
2184 * @retval VINF_SUCCESS on success.
2185 * @param pPool The pool.
2186 * @param pPage The cached page.
2187 */
2188static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2189{
2190 /*
2191 * Filter out the relevant kinds.
2192 */
2193 switch (pPage->enmKind)
2194 {
2195 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2196 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2197 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2198 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2199 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2200 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2201 case PGMPOOLKIND_64BIT_PML4:
2202 case PGMPOOLKIND_32BIT_PD:
2203 case PGMPOOLKIND_PAE_PDPT:
2204 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2205 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2206 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2207 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2208 break;
2209
2210 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2211 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2212 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2213 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2214 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2215 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2216 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2217 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2218 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2219 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2220 case PGMPOOLKIND_ROOT_NESTED:
2221 case PGMPOOLKIND_PAE_PD_PHYS:
2222 case PGMPOOLKIND_PAE_PDPT_PHYS:
2223 case PGMPOOLKIND_32BIT_PD_PHYS:
2224 /* Nothing to monitor here. */
2225 return VINF_SUCCESS;
2226
2227#ifdef PGMPOOL_WITH_MIXED_PT_CR3
2228 break;
2229#endif
2230 default:
2231 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2232 }
2233
2234 /*
2235 * Remove the page from the monitored list or uninstall it if last.
2236 */
2237 const PVM pVM = pPool->CTX_SUFF(pVM);
2238 int rc;
2239 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2240 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2241 {
2242 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2243 {
2244 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2245 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2246 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2247 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2248 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2249 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2250 pPool->pszAccessHandler);
2251 AssertFatalRCSuccess(rc);
2252 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2253 }
2254 else
2255 {
2256 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2257 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2258 {
2259 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2260 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2261 }
2262 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2263 rc = VINF_SUCCESS;
2264 }
2265 }
2266 else
2267 {
2268 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2269 AssertFatalRC(rc);
2270#ifdef VBOX_STRICT
2271 PVMCPU pVCpu = VMMGetCpu(pVM);
2272#endif
2273 AssertMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2274 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2275 }
2276 pPage->fMonitored = false;
2277
2278 /*
2279 * Remove it from the list of modified pages (if in it).
2280 */
2281 pgmPoolMonitorModifiedRemove(pPool, pPage);
2282
2283 return rc;
2284}
2285
2286
2287/**
2288 * Inserts the page into the list of modified pages.
2289 *
2290 * @param pPool The pool.
2291 * @param pPage The page.
2292 */
2293void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2294{
2295 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2296 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2297 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2298 && pPool->iModifiedHead != pPage->idx,
2299 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2300 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2301 pPool->iModifiedHead, pPool->cModifiedPages));
2302
2303 pPage->iModifiedNext = pPool->iModifiedHead;
2304 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2305 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2306 pPool->iModifiedHead = pPage->idx;
2307 pPool->cModifiedPages++;
2308#ifdef VBOX_WITH_STATISTICS
2309 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2310 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2311#endif
2312}
2313
2314
2315/**
2316 * Removes the page from the list of modified pages and resets the
2317 * moficiation counter.
2318 *
2319 * @param pPool The pool.
2320 * @param pPage The page which is believed to be in the list of modified pages.
2321 */
2322static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2323{
2324 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2325 if (pPool->iModifiedHead == pPage->idx)
2326 {
2327 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2328 pPool->iModifiedHead = pPage->iModifiedNext;
2329 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2330 {
2331 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2332 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2333 }
2334 pPool->cModifiedPages--;
2335 }
2336 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2337 {
2338 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2339 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2340 {
2341 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2342 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2343 }
2344 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2345 pPool->cModifiedPages--;
2346 }
2347 else
2348 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2349 pPage->cModifications = 0;
2350}
2351
2352
2353/**
2354 * Zaps the list of modified pages, resetting their modification counters in the process.
2355 *
2356 * @param pVM The VM handle.
2357 */
2358static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2359{
2360 pgmLock(pVM);
2361 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2362 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2363
2364 unsigned cPages = 0; NOREF(cPages);
2365
2366#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2367 pgmPoolResetDirtyPages(pVM, true /* force removal. */);
2368#endif
2369
2370 uint16_t idx = pPool->iModifiedHead;
2371 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2372 while (idx != NIL_PGMPOOL_IDX)
2373 {
2374 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2375 idx = pPage->iModifiedNext;
2376 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2377 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2378 pPage->cModifications = 0;
2379 Assert(++cPages);
2380 }
2381 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2382 pPool->cModifiedPages = 0;
2383 pgmUnlock(pVM);
2384}
2385
2386
2387#ifdef IN_RING3
2388/**
2389 * Callback to clear all shadow pages and clear all modification counters.
2390 *
2391 * @returns VBox status code.
2392 * @param pVM The VM handle.
2393 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
2394 * @param pvUser Unused parameter.
2395 *
2396 * @remark Should only be used when monitoring is available, thus placed in
2397 * the PGMPOOL_WITH_MONITORING \#ifdef.
2398 */
2399DECLCALLBACK(int) pgmPoolClearAll(PVM pVM, PVMCPU pVCpu, void *pvUser)
2400{
2401 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2402 STAM_PROFILE_START(&pPool->StatClearAll, c);
2403 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2404 NOREF(pvUser); NOREF(pVCpu);
2405
2406 pgmLock(pVM);
2407
2408 /*
2409 * Iterate all the pages until we've encountered all that in use.
2410 * This is simple but not quite optimal solution.
2411 */
2412 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2413 unsigned cLeft = pPool->cUsedPages;
2414 unsigned iPage = pPool->cCurPages;
2415 while (--iPage >= PGMPOOL_IDX_FIRST)
2416 {
2417 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2418 if (pPage->GCPhys != NIL_RTGCPHYS)
2419 {
2420 switch (pPage->enmKind)
2421 {
2422 /*
2423 * We only care about shadow page tables.
2424 */
2425 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2426 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2427 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2428 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2429 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2430 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2431 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2432 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2433 {
2434#ifdef PGMPOOL_WITH_USER_TRACKING
2435 if (pPage->cPresent)
2436#endif
2437 {
2438 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2439 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2440 ASMMemZeroPage(pvShw);
2441 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2442#ifdef PGMPOOL_WITH_USER_TRACKING
2443 pPage->cPresent = 0;
2444 pPage->iFirstPresent = ~0;
2445#endif
2446 }
2447 }
2448 /* fall thru */
2449
2450 default:
2451 Assert(!pPage->cModifications || ++cModifiedPages);
2452 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2453 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2454 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2455 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2456 pPage->cModifications = 0;
2457 break;
2458
2459 }
2460 if (!--cLeft)
2461 break;
2462 }
2463 }
2464
2465 /* swipe the special pages too. */
2466 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2467 {
2468 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2469 if (pPage->GCPhys != NIL_RTGCPHYS)
2470 {
2471 Assert(!pPage->cModifications || ++cModifiedPages);
2472 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2473 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2474 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2475 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2476 pPage->cModifications = 0;
2477 }
2478 }
2479
2480#ifndef DEBUG_michael
2481 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2482#endif
2483 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2484 pPool->cModifiedPages = 0;
2485
2486#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2487 /*
2488 * Clear all the GCPhys links and rebuild the phys ext free list.
2489 */
2490 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2491 pRam;
2492 pRam = pRam->CTX_SUFF(pNext))
2493 {
2494 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2495 while (iPage-- > 0)
2496 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
2497 }
2498
2499 pPool->iPhysExtFreeHead = 0;
2500 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2501 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2502 for (unsigned i = 0; i < cMaxPhysExts; i++)
2503 {
2504 paPhysExts[i].iNext = i + 1;
2505 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2506 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2507 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2508 }
2509 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2510#endif
2511
2512#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2513 /* Clear all dirty pages. */
2514 pPool->idxFreeDirtyPage = 0;
2515 pPool->cDirtyPages = 0;
2516 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
2517 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
2518#endif
2519
2520 /* Clear the PGM_SYNC_CLEAR_PGM_POOL flag on all VCPUs to prevent redundant flushes. */
2521 for (unsigned idCpu = 0; idCpu < pVM->cCPUs; idCpu++)
2522 {
2523 PVMCPU pVCpu = &pVM->aCpus[idCpu];
2524
2525 pVCpu->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2526 }
2527
2528 pPool->cPresent = 0;
2529 pgmUnlock(pVM);
2530 PGM_INVL_ALL_VCPU_TLBS(pVM);
2531 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2532 return VINF_SUCCESS;
2533}
2534#endif /* IN_RING3 */
2535
2536
2537/**
2538 * Handle SyncCR3 pool tasks
2539 *
2540 * @returns VBox status code.
2541 * @retval VINF_SUCCESS if successfully added.
2542 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2543 * @param pVCpu The VMCPU handle.
2544 * @remark Should only be used when monitoring is available, thus placed in
2545 * the PGMPOOL_WITH_MONITORING #ifdef.
2546 */
2547int pgmPoolSyncCR3(PVMCPU pVCpu)
2548{
2549 PVM pVM = pVCpu->CTX_SUFF(pVM);
2550 LogFlow(("pgmPoolSyncCR3\n"));
2551
2552 /*
2553 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2554 * Occasionally we will have to clear all the shadow page tables because we wanted
2555 * to monitor a page which was mapped by too many shadowed page tables. This operation
2556 * sometimes refered to as a 'lightweight flush'.
2557 */
2558# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2559 if (ASMBitTestAndClear(&pVCpu->pgm.s.fSyncFlags, PGM_SYNC_CLEAR_PGM_POOL_BIT))
2560 {
2561 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmPoolClearAll, NULL);
2562 AssertRC(rc);
2563 }
2564# else /* !IN_RING3 */
2565 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2566 {
2567 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2568 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2569 return VINF_PGM_SYNC_CR3;
2570 }
2571# endif /* !IN_RING3 */
2572 else
2573 pgmPoolMonitorModifiedClearAll(pVM);
2574
2575 return VINF_SUCCESS;
2576}
2577
2578#endif /* PGMPOOL_WITH_MONITORING */
2579#ifdef PGMPOOL_WITH_USER_TRACKING
2580
2581/**
2582 * Frees up at least one user entry.
2583 *
2584 * @returns VBox status code.
2585 * @retval VINF_SUCCESS if successfully added.
2586 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2587 * @param pPool The pool.
2588 * @param iUser The user index.
2589 */
2590static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2591{
2592 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2593#ifdef PGMPOOL_WITH_CACHE
2594 /*
2595 * Just free cached pages in a braindead fashion.
2596 */
2597 /** @todo walk the age list backwards and free the first with usage. */
2598 int rc = VINF_SUCCESS;
2599 do
2600 {
2601 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2602 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2603 rc = rc2;
2604 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2605 return rc;
2606#else
2607 /*
2608 * Lazy approach.
2609 */
2610 /* @todo This path no longer works (CR3 root pages will be flushed)!! */
2611 AssertCompileFailed();
2612 Assert(!CPUMIsGuestInLongMode(pVM));
2613 pgmPoolFlushAllInt(pPool);
2614 return VERR_PGM_POOL_FLUSHED;
2615#endif
2616}
2617
2618
2619/**
2620 * Inserts a page into the cache.
2621 *
2622 * This will create user node for the page, insert it into the GCPhys
2623 * hash, and insert it into the age list.
2624 *
2625 * @returns VBox status code.
2626 * @retval VINF_SUCCESS if successfully added.
2627 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2628 * @param pPool The pool.
2629 * @param pPage The cached page.
2630 * @param GCPhys The GC physical address of the page we're gonna shadow.
2631 * @param iUser The user index.
2632 * @param iUserTable The user table index.
2633 */
2634DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2635{
2636 int rc = VINF_SUCCESS;
2637 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2638
2639 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2640
2641#ifdef VBOX_STRICT
2642 /*
2643 * Check that the entry doesn't already exists.
2644 */
2645 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2646 {
2647 uint16_t i = pPage->iUserHead;
2648 do
2649 {
2650 Assert(i < pPool->cMaxUsers);
2651 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2652 i = paUsers[i].iNext;
2653 } while (i != NIL_PGMPOOL_USER_INDEX);
2654 }
2655#endif
2656
2657 /*
2658 * Find free a user node.
2659 */
2660 uint16_t i = pPool->iUserFreeHead;
2661 if (i == NIL_PGMPOOL_USER_INDEX)
2662 {
2663 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2664 if (RT_FAILURE(rc))
2665 return rc;
2666 i = pPool->iUserFreeHead;
2667 }
2668
2669 /*
2670 * Unlink the user node from the free list,
2671 * initialize and insert it into the user list.
2672 */
2673 pPool->iUserFreeHead = paUsers[i].iNext;
2674 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2675 paUsers[i].iUser = iUser;
2676 paUsers[i].iUserTable = iUserTable;
2677 pPage->iUserHead = i;
2678
2679 /*
2680 * Insert into cache and enable monitoring of the guest page if enabled.
2681 *
2682 * Until we implement caching of all levels, including the CR3 one, we'll
2683 * have to make sure we don't try monitor & cache any recursive reuse of
2684 * a monitored CR3 page. Because all windows versions are doing this we'll
2685 * have to be able to do combined access monitoring, CR3 + PT and
2686 * PD + PT (guest PAE).
2687 *
2688 * Update:
2689 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2690 */
2691#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2692# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2693 const bool fCanBeMonitored = true;
2694# else
2695 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2696 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2697 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2698# endif
2699# ifdef PGMPOOL_WITH_CACHE
2700 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2701# endif
2702 if (fCanBeMonitored)
2703 {
2704# ifdef PGMPOOL_WITH_MONITORING
2705 rc = pgmPoolMonitorInsert(pPool, pPage);
2706 AssertRC(rc);
2707 }
2708# endif
2709#endif /* PGMPOOL_WITH_MONITORING */
2710 return rc;
2711}
2712
2713
2714# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2715/**
2716 * Adds a user reference to a page.
2717 *
2718 * This will move the page to the head of the
2719 *
2720 * @returns VBox status code.
2721 * @retval VINF_SUCCESS if successfully added.
2722 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2723 * @param pPool The pool.
2724 * @param pPage The cached page.
2725 * @param iUser The user index.
2726 * @param iUserTable The user table.
2727 */
2728static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2729{
2730 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2731
2732 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2733
2734# ifdef VBOX_STRICT
2735 /*
2736 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2737 */
2738 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2739 {
2740 uint16_t i = pPage->iUserHead;
2741 do
2742 {
2743 Assert(i < pPool->cMaxUsers);
2744 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2745 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2746 i = paUsers[i].iNext;
2747 } while (i != NIL_PGMPOOL_USER_INDEX);
2748 }
2749# endif
2750
2751 /*
2752 * Allocate a user node.
2753 */
2754 uint16_t i = pPool->iUserFreeHead;
2755 if (i == NIL_PGMPOOL_USER_INDEX)
2756 {
2757 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2758 if (RT_FAILURE(rc))
2759 return rc;
2760 i = pPool->iUserFreeHead;
2761 }
2762 pPool->iUserFreeHead = paUsers[i].iNext;
2763
2764 /*
2765 * Initialize the user node and insert it.
2766 */
2767 paUsers[i].iNext = pPage->iUserHead;
2768 paUsers[i].iUser = iUser;
2769 paUsers[i].iUserTable = iUserTable;
2770 pPage->iUserHead = i;
2771
2772# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2773 if (pPage->fDirty)
2774 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, true /* force removal */);
2775# endif
2776
2777# ifdef PGMPOOL_WITH_CACHE
2778 /*
2779 * Tell the cache to update its replacement stats for this page.
2780 */
2781 pgmPoolCacheUsed(pPool, pPage);
2782# endif
2783 return VINF_SUCCESS;
2784}
2785# endif /* PGMPOOL_WITH_CACHE */
2786
2787
2788/**
2789 * Frees a user record associated with a page.
2790 *
2791 * This does not clear the entry in the user table, it simply replaces the
2792 * user record to the chain of free records.
2793 *
2794 * @param pPool The pool.
2795 * @param HCPhys The HC physical address of the shadow page.
2796 * @param iUser The shadow page pool index of the user table.
2797 * @param iUserTable The index into the user table (shadowed).
2798 */
2799static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2800{
2801 /*
2802 * Unlink and free the specified user entry.
2803 */
2804 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2805
2806 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2807 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2808 uint16_t i = pPage->iUserHead;
2809 if ( i != NIL_PGMPOOL_USER_INDEX
2810 && paUsers[i].iUser == iUser
2811 && paUsers[i].iUserTable == iUserTable)
2812 {
2813 pPage->iUserHead = paUsers[i].iNext;
2814
2815 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2816 paUsers[i].iNext = pPool->iUserFreeHead;
2817 pPool->iUserFreeHead = i;
2818 return;
2819 }
2820
2821 /* General: Linear search. */
2822 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2823 while (i != NIL_PGMPOOL_USER_INDEX)
2824 {
2825 if ( paUsers[i].iUser == iUser
2826 && paUsers[i].iUserTable == iUserTable)
2827 {
2828 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2829 paUsers[iPrev].iNext = paUsers[i].iNext;
2830 else
2831 pPage->iUserHead = paUsers[i].iNext;
2832
2833 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2834 paUsers[i].iNext = pPool->iUserFreeHead;
2835 pPool->iUserFreeHead = i;
2836 return;
2837 }
2838 iPrev = i;
2839 i = paUsers[i].iNext;
2840 }
2841
2842 /* Fatal: didn't find it */
2843 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2844 iUser, iUserTable, pPage->GCPhys));
2845}
2846
2847
2848/**
2849 * Gets the entry size of a shadow table.
2850 *
2851 * @param enmKind The kind of page.
2852 *
2853 * @returns The size of the entry in bytes. That is, 4 or 8.
2854 * @returns If the kind is not for a table, an assertion is raised and 0 is
2855 * returned.
2856 */
2857DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2858{
2859 switch (enmKind)
2860 {
2861 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2862 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2863 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2864 case PGMPOOLKIND_32BIT_PD:
2865 case PGMPOOLKIND_32BIT_PD_PHYS:
2866 return 4;
2867
2868 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2869 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2870 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2871 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2872 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2873 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2874 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2875 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2876 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2877 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2878 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2879 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2880 case PGMPOOLKIND_64BIT_PML4:
2881 case PGMPOOLKIND_PAE_PDPT:
2882 case PGMPOOLKIND_ROOT_NESTED:
2883 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2884 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2885 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2886 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2887 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2888 case PGMPOOLKIND_PAE_PD_PHYS:
2889 case PGMPOOLKIND_PAE_PDPT_PHYS:
2890 return 8;
2891
2892 default:
2893 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2894 }
2895}
2896
2897
2898/**
2899 * Gets the entry size of a guest table.
2900 *
2901 * @param enmKind The kind of page.
2902 *
2903 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2904 * @returns If the kind is not for a table, an assertion is raised and 0 is
2905 * returned.
2906 */
2907DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2908{
2909 switch (enmKind)
2910 {
2911 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2912 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2913 case PGMPOOLKIND_32BIT_PD:
2914 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2915 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2916 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2917 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2918 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2919 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2920 return 4;
2921
2922 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2923 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2924 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2925 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2926 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2927 case PGMPOOLKIND_64BIT_PML4:
2928 case PGMPOOLKIND_PAE_PDPT:
2929 return 8;
2930
2931 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2932 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2933 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2934 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2935 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2936 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2937 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2938 case PGMPOOLKIND_ROOT_NESTED:
2939 case PGMPOOLKIND_PAE_PD_PHYS:
2940 case PGMPOOLKIND_PAE_PDPT_PHYS:
2941 case PGMPOOLKIND_32BIT_PD_PHYS:
2942 /** @todo can we return 0? (nobody is calling this...) */
2943 AssertFailed();
2944 return 0;
2945
2946 default:
2947 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2948 }
2949}
2950
2951#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2952
2953/**
2954 * Scans one shadow page table for mappings of a physical page.
2955 *
2956 * @param pVM The VM handle.
2957 * @param pPhysPage The guest page in question.
2958 * @param iShw The shadow page table.
2959 * @param cRefs The number of references made in that PT.
2960 */
2961static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2962{
2963 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
2964 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2965
2966 /*
2967 * Assert sanity.
2968 */
2969 Assert(cRefs == 1);
2970 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2971 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2972
2973 /*
2974 * Then, clear the actual mappings to the page in the shadow PT.
2975 */
2976 switch (pPage->enmKind)
2977 {
2978 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2979 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2980 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2981 {
2982 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2983 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2984 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2985 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2986 {
2987 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2988 pPT->a[i].u = 0;
2989 cRefs--;
2990 if (!cRefs)
2991 return;
2992 }
2993#ifdef LOG_ENABLED
2994 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2995 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2996 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2997 {
2998 Log(("i=%d cRefs=%d\n", i, cRefs--));
2999 }
3000#endif
3001 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3002 break;
3003 }
3004
3005 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3006 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3007 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3008 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3009 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3010 {
3011 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3012 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3013 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3014 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3015 {
3016 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3017 pPT->a[i].u = 0;
3018 cRefs--;
3019 if (!cRefs)
3020 return;
3021 }
3022#ifdef LOG_ENABLED
3023 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3024 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3025 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3026 {
3027 Log(("i=%d cRefs=%d\n", i, cRefs--));
3028 }
3029#endif
3030 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
3031 break;
3032 }
3033
3034 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3035 {
3036 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3037 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3038 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3039 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3040 {
3041 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3042 pPT->a[i].u = 0;
3043 cRefs--;
3044 if (!cRefs)
3045 return;
3046 }
3047#ifdef LOG_ENABLED
3048 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3049 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3050 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3051 {
3052 Log(("i=%d cRefs=%d\n", i, cRefs--));
3053 }
3054#endif
3055 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3056 break;
3057 }
3058
3059 default:
3060 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3061 }
3062}
3063
3064
3065/**
3066 * Scans one shadow page table for mappings of a physical page.
3067 *
3068 * @param pVM The VM handle.
3069 * @param pPhysPage The guest page in question.
3070 * @param iShw The shadow page table.
3071 * @param cRefs The number of references made in that PT.
3072 */
3073void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
3074{
3075 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3076 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
3077 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3078 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
3079 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3080 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3081}
3082
3083
3084/**
3085 * Flushes a list of shadow page tables mapping the same physical page.
3086 *
3087 * @param pVM The VM handle.
3088 * @param pPhysPage The guest page in question.
3089 * @param iPhysExt The physical cross reference extent list to flush.
3090 */
3091void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
3092{
3093 Assert(PGMIsLockOwner(pVM));
3094 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3095 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3096 LogFlow(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%R[pgmpage] iPhysExt\n", pPhysPage, iPhysExt));
3097
3098 const uint16_t iPhysExtStart = iPhysExt;
3099 PPGMPOOLPHYSEXT pPhysExt;
3100 do
3101 {
3102 Assert(iPhysExt < pPool->cMaxPhysExts);
3103 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3104 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3105 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3106 {
3107 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
3108 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3109 }
3110
3111 /* next */
3112 iPhysExt = pPhysExt->iNext;
3113 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3114
3115 /* insert the list into the free list and clear the ram range entry. */
3116 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3117 pPool->iPhysExtFreeHead = iPhysExtStart;
3118 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3119
3120 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3121}
3122
3123#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3124
3125/**
3126 * Flushes all shadow page table mappings of the given guest page.
3127 *
3128 * This is typically called when the host page backing the guest one has been
3129 * replaced or when the page protection was changed due to an access handler.
3130 *
3131 * @returns VBox status code.
3132 * @retval VINF_SUCCESS if all references has been successfully cleared.
3133 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3134 * pool cleaning. FF and sync flags are set.
3135 *
3136 * @param pVM The VM handle.
3137 * @param pPhysPage The guest page in question.
3138 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3139 * flushed, it is NOT touched if this isn't necessary.
3140 * The caller MUST initialized this to @a false.
3141 */
3142int pgmPoolTrackFlushGCPhys(PVM pVM, PPGMPAGE pPhysPage, bool *pfFlushTLBs)
3143{
3144 PVMCPU pVCpu = VMMGetCpu(pVM);
3145 pgmLock(pVM);
3146 int rc = VINF_SUCCESS;
3147#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3148 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3149 if (u16)
3150 {
3151 /*
3152 * The zero page is currently screwing up the tracking and we'll
3153 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3154 * is defined, zero pages won't normally be mapped. Some kind of solution
3155 * will be needed for this problem of course, but it will have to wait...
3156 */
3157 if (PGM_PAGE_IS_ZERO(pPhysPage))
3158 rc = VINF_PGM_GCPHYS_ALIASED;
3159 else
3160 {
3161# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3162 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3163 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3164 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3165# endif
3166
3167 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3168 pgmPoolTrackFlushGCPhysPT(pVM,
3169 pPhysPage,
3170 PGMPOOL_TD_GET_IDX(u16),
3171 PGMPOOL_TD_GET_CREFS(u16));
3172 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3173 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, PGMPOOL_TD_GET_IDX(u16));
3174 else
3175 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3176 *pfFlushTLBs = true;
3177
3178# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3179 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3180# endif
3181 }
3182 }
3183
3184#elif defined(PGMPOOL_WITH_CACHE)
3185 if (PGM_PAGE_IS_ZERO(pPhysPage))
3186 rc = VINF_PGM_GCPHYS_ALIASED;
3187 else
3188 {
3189# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3190 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow kills the pool otherwise. */
3191 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3192# endif
3193 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3194 if (rc == VINF_SUCCESS)
3195 *pfFlushTLBs = true;
3196 }
3197
3198# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3199 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3200# endif
3201
3202#else
3203 rc = VINF_PGM_GCPHYS_ALIASED;
3204#endif
3205
3206 if (rc == VINF_PGM_GCPHYS_ALIASED)
3207 {
3208 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3209 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3210 rc = VINF_PGM_SYNC_CR3;
3211 }
3212 pgmUnlock(pVM);
3213 return rc;
3214}
3215
3216
3217/**
3218 * Scans all shadow page tables for mappings of a physical page.
3219 *
3220 * This may be slow, but it's most likely more efficient than cleaning
3221 * out the entire page pool / cache.
3222 *
3223 * @returns VBox status code.
3224 * @retval VINF_SUCCESS if all references has been successfully cleared.
3225 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3226 * a page pool cleaning.
3227 *
3228 * @param pVM The VM handle.
3229 * @param pPhysPage The guest page in question.
3230 */
3231int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3232{
3233 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3234 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3235 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3236 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3237
3238#if 1
3239 /*
3240 * There is a limit to what makes sense.
3241 */
3242 if (pPool->cPresent > 1024)
3243 {
3244 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3245 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3246 return VINF_PGM_GCPHYS_ALIASED;
3247 }
3248#endif
3249
3250 /*
3251 * Iterate all the pages until we've encountered all that in use.
3252 * This is simple but not quite optimal solution.
3253 */
3254 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3255 const uint32_t u32 = u64;
3256 unsigned cLeft = pPool->cUsedPages;
3257 unsigned iPage = pPool->cCurPages;
3258 while (--iPage >= PGMPOOL_IDX_FIRST)
3259 {
3260 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3261 if (pPage->GCPhys != NIL_RTGCPHYS)
3262 {
3263 switch (pPage->enmKind)
3264 {
3265 /*
3266 * We only care about shadow page tables.
3267 */
3268 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3269 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3270 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3271 {
3272 unsigned cPresent = pPage->cPresent;
3273 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3274 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3275 if (pPT->a[i].n.u1Present)
3276 {
3277 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3278 {
3279 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3280 pPT->a[i].u = 0;
3281 }
3282 if (!--cPresent)
3283 break;
3284 }
3285 break;
3286 }
3287
3288 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3289 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3290 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3291 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3292 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3293 {
3294 unsigned cPresent = pPage->cPresent;
3295 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3296 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3297 if (pPT->a[i].n.u1Present)
3298 {
3299 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3300 {
3301 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3302 pPT->a[i].u = 0;
3303 }
3304 if (!--cPresent)
3305 break;
3306 }
3307 break;
3308 }
3309 }
3310 if (!--cLeft)
3311 break;
3312 }
3313 }
3314
3315 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3316 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3317 return VINF_SUCCESS;
3318}
3319
3320
3321/**
3322 * Clears the user entry in a user table.
3323 *
3324 * This is used to remove all references to a page when flushing it.
3325 */
3326static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3327{
3328 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3329 Assert(pUser->iUser < pPool->cCurPages);
3330 uint32_t iUserTable = pUser->iUserTable;
3331
3332 /*
3333 * Map the user page.
3334 */
3335 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3336 union
3337 {
3338 uint64_t *pau64;
3339 uint32_t *pau32;
3340 } u;
3341 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3342
3343 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3344
3345 /* Safety precaution in case we change the paging for other modes too in the future. */
3346 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3347
3348#ifdef VBOX_STRICT
3349 /*
3350 * Some sanity checks.
3351 */
3352 switch (pUserPage->enmKind)
3353 {
3354 case PGMPOOLKIND_32BIT_PD:
3355 case PGMPOOLKIND_32BIT_PD_PHYS:
3356 Assert(iUserTable < X86_PG_ENTRIES);
3357 break;
3358 case PGMPOOLKIND_PAE_PDPT:
3359 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3360 case PGMPOOLKIND_PAE_PDPT_PHYS:
3361 Assert(iUserTable < 4);
3362 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3363 break;
3364 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3365 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3366 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3367 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3368 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3369 case PGMPOOLKIND_PAE_PD_PHYS:
3370 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3371 break;
3372 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3373 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3374 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3375 break;
3376 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3377 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3378 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3379 break;
3380 case PGMPOOLKIND_64BIT_PML4:
3381 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3382 /* GCPhys >> PAGE_SHIFT is the index here */
3383 break;
3384 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3385 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3386 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3387 break;
3388
3389 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3390 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3391 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3392 break;
3393
3394 case PGMPOOLKIND_ROOT_NESTED:
3395 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3396 break;
3397
3398 default:
3399 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3400 break;
3401 }
3402#endif /* VBOX_STRICT */
3403
3404 /*
3405 * Clear the entry in the user page.
3406 */
3407 switch (pUserPage->enmKind)
3408 {
3409 /* 32-bit entries */
3410 case PGMPOOLKIND_32BIT_PD:
3411 case PGMPOOLKIND_32BIT_PD_PHYS:
3412 u.pau32[iUserTable] = 0;
3413 break;
3414
3415 /* 64-bit entries */
3416 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3417 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3418 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3419 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3420 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3421#if defined(IN_RC)
3422 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3423 * non-present PDPT will continue to cause page faults.
3424 */
3425 ASMReloadCR3();
3426#endif
3427 /* no break */
3428 case PGMPOOLKIND_PAE_PD_PHYS:
3429 case PGMPOOLKIND_PAE_PDPT_PHYS:
3430 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3431 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3432 case PGMPOOLKIND_64BIT_PML4:
3433 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3434 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3435 case PGMPOOLKIND_PAE_PDPT:
3436 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3437 case PGMPOOLKIND_ROOT_NESTED:
3438 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3439 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3440 u.pau64[iUserTable] = 0;
3441 break;
3442
3443 default:
3444 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3445 }
3446}
3447
3448
3449/**
3450 * Clears all users of a page.
3451 */
3452static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3453{
3454 /*
3455 * Free all the user records.
3456 */
3457 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3458
3459 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3460 uint16_t i = pPage->iUserHead;
3461 while (i != NIL_PGMPOOL_USER_INDEX)
3462 {
3463 /* Clear enter in user table. */
3464 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3465
3466 /* Free it. */
3467 const uint16_t iNext = paUsers[i].iNext;
3468 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3469 paUsers[i].iNext = pPool->iUserFreeHead;
3470 pPool->iUserFreeHead = i;
3471
3472 /* Next. */
3473 i = iNext;
3474 }
3475 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3476}
3477
3478#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3479
3480/**
3481 * Allocates a new physical cross reference extent.
3482 *
3483 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3484 * @param pVM The VM handle.
3485 * @param piPhysExt Where to store the phys ext index.
3486 */
3487PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3488{
3489 Assert(PGMIsLockOwner(pVM));
3490 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3491 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3492 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3493 {
3494 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3495 return NULL;
3496 }
3497 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3498 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3499 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3500 *piPhysExt = iPhysExt;
3501 return pPhysExt;
3502}
3503
3504
3505/**
3506 * Frees a physical cross reference extent.
3507 *
3508 * @param pVM The VM handle.
3509 * @param iPhysExt The extent to free.
3510 */
3511void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3512{
3513 Assert(PGMIsLockOwner(pVM));
3514 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3515 Assert(iPhysExt < pPool->cMaxPhysExts);
3516 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3517 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3518 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3519 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3520 pPool->iPhysExtFreeHead = iPhysExt;
3521}
3522
3523
3524/**
3525 * Frees a physical cross reference extent.
3526 *
3527 * @param pVM The VM handle.
3528 * @param iPhysExt The extent to free.
3529 */
3530void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3531{
3532 Assert(PGMIsLockOwner(pVM));
3533 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3534
3535 const uint16_t iPhysExtStart = iPhysExt;
3536 PPGMPOOLPHYSEXT pPhysExt;
3537 do
3538 {
3539 Assert(iPhysExt < pPool->cMaxPhysExts);
3540 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3541 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3542 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3543
3544 /* next */
3545 iPhysExt = pPhysExt->iNext;
3546 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3547
3548 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3549 pPool->iPhysExtFreeHead = iPhysExtStart;
3550}
3551
3552
3553/**
3554 * Insert a reference into a list of physical cross reference extents.
3555 *
3556 * @returns The new tracking data for PGMPAGE.
3557 *
3558 * @param pVM The VM handle.
3559 * @param iPhysExt The physical extent index of the list head.
3560 * @param iShwPT The shadow page table index.
3561 *
3562 */
3563static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3564{
3565 Assert(PGMIsLockOwner(pVM));
3566 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3567 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3568
3569 /* special common case. */
3570 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3571 {
3572 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3573 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3574 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d}\n", iPhysExt, iShwPT));
3575 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3576 }
3577
3578 /* general treatment. */
3579 const uint16_t iPhysExtStart = iPhysExt;
3580 unsigned cMax = 15;
3581 for (;;)
3582 {
3583 Assert(iPhysExt < pPool->cMaxPhysExts);
3584 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3585 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3586 {
3587 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3588 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3589 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3590 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3591 }
3592 if (!--cMax)
3593 {
3594 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3595 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3596 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3597 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3598 }
3599 }
3600
3601 /* add another extent to the list. */
3602 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3603 if (!pNew)
3604 {
3605 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3606 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3607 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3608 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3609 }
3610 pNew->iNext = iPhysExtStart;
3611 pNew->aidx[0] = iShwPT;
3612 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3613 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3614}
3615
3616
3617/**
3618 * Add a reference to guest physical page where extents are in use.
3619 *
3620 * @returns The new tracking data for PGMPAGE.
3621 *
3622 * @param pVM The VM handle.
3623 * @param u16 The ram range flags (top 16-bits).
3624 * @param iShwPT The shadow page table index.
3625 */
3626uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3627{
3628 pgmLock(pVM);
3629 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3630 {
3631 /*
3632 * Convert to extent list.
3633 */
3634 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3635 uint16_t iPhysExt;
3636 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3637 if (pPhysExt)
3638 {
3639 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3640 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3641 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3642 pPhysExt->aidx[1] = iShwPT;
3643 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3644 }
3645 else
3646 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3647 }
3648 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3649 {
3650 /*
3651 * Insert into the extent list.
3652 */
3653 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3654 }
3655 else
3656 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3657 pgmUnlock(pVM);
3658 return u16;
3659}
3660
3661
3662/**
3663 * Clear references to guest physical memory.
3664 *
3665 * @param pPool The pool.
3666 * @param pPage The page.
3667 * @param pPhysPage Pointer to the aPages entry in the ram range.
3668 */
3669void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3670{
3671 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3672 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3673
3674 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3675 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3676 {
3677 PVM pVM = pPool->CTX_SUFF(pVM);
3678 pgmLock(pVM);
3679
3680 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3681 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3682 do
3683 {
3684 Assert(iPhysExt < pPool->cMaxPhysExts);
3685
3686 /*
3687 * Look for the shadow page and check if it's all freed.
3688 */
3689 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3690 {
3691 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3692 {
3693 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3694
3695 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3696 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3697 {
3698 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3699 pgmUnlock(pVM);
3700 return;
3701 }
3702
3703 /* we can free the node. */
3704 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3705 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3706 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3707 {
3708 /* lonely node */
3709 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3710 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3711 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3712 }
3713 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3714 {
3715 /* head */
3716 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3717 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3718 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3719 }
3720 else
3721 {
3722 /* in list */
3723 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3724 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3725 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3726 }
3727 iPhysExt = iPhysExtNext;
3728 pgmUnlock(pVM);
3729 return;
3730 }
3731 }
3732
3733 /* next */
3734 iPhysExtPrev = iPhysExt;
3735 iPhysExt = paPhysExts[iPhysExt].iNext;
3736 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3737
3738 pgmUnlock(pVM);
3739 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3740 }
3741 else /* nothing to do */
3742 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3743}
3744
3745
3746/**
3747 * Clear references to guest physical memory.
3748 *
3749 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3750 * is assumed to be correct, so the linear search can be skipped and we can assert
3751 * at an earlier point.
3752 *
3753 * @param pPool The pool.
3754 * @param pPage The page.
3755 * @param HCPhys The host physical address corresponding to the guest page.
3756 * @param GCPhys The guest physical address corresponding to HCPhys.
3757 */
3758static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3759{
3760 /*
3761 * Walk range list.
3762 */
3763 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3764 while (pRam)
3765 {
3766 RTGCPHYS off = GCPhys - pRam->GCPhys;
3767 if (off < pRam->cb)
3768 {
3769 /* does it match? */
3770 const unsigned iPage = off >> PAGE_SHIFT;
3771 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3772#ifdef LOG_ENABLED
3773RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3774Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3775#endif
3776 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3777 {
3778 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3779 return;
3780 }
3781 break;
3782 }
3783 pRam = pRam->CTX_SUFF(pNext);
3784 }
3785 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3786}
3787
3788
3789/**
3790 * Clear references to guest physical memory.
3791 *
3792 * @param pPool The pool.
3793 * @param pPage The page.
3794 * @param HCPhys The host physical address corresponding to the guest page.
3795 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3796 */
3797void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3798{
3799 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
3800
3801 /*
3802 * Walk range list.
3803 */
3804 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3805 while (pRam)
3806 {
3807 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3808 if (off < pRam->cb)
3809 {
3810 /* does it match? */
3811 const unsigned iPage = off >> PAGE_SHIFT;
3812 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3813 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3814 {
3815 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3816 return;
3817 }
3818 break;
3819 }
3820 pRam = pRam->CTX_SUFF(pNext);
3821 }
3822
3823 /*
3824 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3825 */
3826 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3827 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3828 while (pRam)
3829 {
3830 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3831 while (iPage-- > 0)
3832 {
3833 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3834 {
3835 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3836 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3837 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3838 return;
3839 }
3840 }
3841 pRam = pRam->CTX_SUFF(pNext);
3842 }
3843
3844 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3845}
3846
3847
3848/**
3849 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3850 *
3851 * @param pPool The pool.
3852 * @param pPage The page.
3853 * @param pShwPT The shadow page table (mapping of the page).
3854 * @param pGstPT The guest page table.
3855 */
3856DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3857{
3858 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3859 if (pShwPT->a[i].n.u1Present)
3860 {
3861 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3862 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3863 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3864 if (!--pPage->cPresent)
3865 break;
3866 }
3867}
3868
3869
3870/**
3871 * Clear references to guest physical memory in a PAE / 32-bit page table.
3872 *
3873 * @param pPool The pool.
3874 * @param pPage The page.
3875 * @param pShwPT The shadow page table (mapping of the page).
3876 * @param pGstPT The guest page table (just a half one).
3877 */
3878DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3879{
3880 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3881 if (pShwPT->a[i].n.u1Present)
3882 {
3883 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
3884 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3885 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3886 }
3887}
3888
3889
3890/**
3891 * Clear references to guest physical memory in a PAE / PAE page table.
3892 *
3893 * @param pPool The pool.
3894 * @param pPage The page.
3895 * @param pShwPT The shadow page table (mapping of the page).
3896 * @param pGstPT The guest page table.
3897 */
3898DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3899{
3900 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3901 if (pShwPT->a[i].n.u1Present)
3902 {
3903 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3904 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3905 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3906 }
3907}
3908
3909
3910/**
3911 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3912 *
3913 * @param pPool The pool.
3914 * @param pPage The page.
3915 * @param pShwPT The shadow page table (mapping of the page).
3916 */
3917DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3918{
3919 RTGCPHYS GCPhys = pPage->GCPhys;
3920 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3921 if (pShwPT->a[i].n.u1Present)
3922 {
3923 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3924 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3925 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3926 }
3927}
3928
3929
3930/**
3931 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3932 *
3933 * @param pPool The pool.
3934 * @param pPage The page.
3935 * @param pShwPT The shadow page table (mapping of the page).
3936 */
3937DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3938{
3939 RTGCPHYS GCPhys = pPage->GCPhys;
3940 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3941 if (pShwPT->a[i].n.u1Present)
3942 {
3943 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3944 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3945 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3946 }
3947}
3948
3949#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3950
3951
3952/**
3953 * Clear references to shadowed pages in a 32 bits page directory.
3954 *
3955 * @param pPool The pool.
3956 * @param pPage The page.
3957 * @param pShwPD The shadow page directory (mapping of the page).
3958 */
3959DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
3960{
3961 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3962 {
3963 if ( pShwPD->a[i].n.u1Present
3964 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3965 )
3966 {
3967 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
3968 if (pSubPage)
3969 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3970 else
3971 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
3972 }
3973 }
3974}
3975
3976/**
3977 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3978 *
3979 * @param pPool The pool.
3980 * @param pPage The page.
3981 * @param pShwPD The shadow page directory (mapping of the page).
3982 */
3983DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3984{
3985 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3986 {
3987 if ( pShwPD->a[i].n.u1Present
3988 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3989 )
3990 {
3991 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3992 if (pSubPage)
3993 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3994 else
3995 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3996 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3997 }
3998 }
3999}
4000
4001/**
4002 * Clear references to shadowed pages in a PAE page directory pointer table.
4003 *
4004 * @param pPool The pool.
4005 * @param pPage The page.
4006 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4007 */
4008DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4009{
4010 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4011 {
4012 if ( pShwPDPT->a[i].n.u1Present
4013 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4014 )
4015 {
4016 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4017 if (pSubPage)
4018 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4019 else
4020 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4021 }
4022 }
4023}
4024
4025
4026/**
4027 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4028 *
4029 * @param pPool The pool.
4030 * @param pPage The page.
4031 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4032 */
4033DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4034{
4035 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4036 {
4037 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
4038 if (pShwPDPT->a[i].n.u1Present)
4039 {
4040 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4041 if (pSubPage)
4042 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4043 else
4044 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4045 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4046 }
4047 }
4048}
4049
4050
4051/**
4052 * Clear references to shadowed pages in a 64-bit level 4 page table.
4053 *
4054 * @param pPool The pool.
4055 * @param pPage The page.
4056 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4057 */
4058DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4059{
4060 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4061 {
4062 if (pShwPML4->a[i].n.u1Present)
4063 {
4064 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4065 if (pSubPage)
4066 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4067 else
4068 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4069 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4070 }
4071 }
4072}
4073
4074
4075/**
4076 * Clear references to shadowed pages in an EPT page table.
4077 *
4078 * @param pPool The pool.
4079 * @param pPage The page.
4080 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4081 */
4082DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4083{
4084 RTGCPHYS GCPhys = pPage->GCPhys;
4085 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4086 if (pShwPT->a[i].n.u1Present)
4087 {
4088 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4089 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4090 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
4091 }
4092}
4093
4094
4095/**
4096 * Clear references to shadowed pages in an EPT page directory.
4097 *
4098 * @param pPool The pool.
4099 * @param pPage The page.
4100 * @param pShwPD The shadow page directory (mapping of the page).
4101 */
4102DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4103{
4104 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4105 {
4106 if (pShwPD->a[i].n.u1Present)
4107 {
4108 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4109 if (pSubPage)
4110 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4111 else
4112 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4113 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4114 }
4115 }
4116}
4117
4118
4119/**
4120 * Clear references to shadowed pages in an EPT page directory pointer table.
4121 *
4122 * @param pPool The pool.
4123 * @param pPage The page.
4124 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4125 */
4126DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4127{
4128 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4129 {
4130 if (pShwPDPT->a[i].n.u1Present)
4131 {
4132 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4133 if (pSubPage)
4134 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4135 else
4136 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4137 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4138 }
4139 }
4140}
4141
4142
4143/**
4144 * Clears all references made by this page.
4145 *
4146 * This includes other shadow pages and GC physical addresses.
4147 *
4148 * @param pPool The pool.
4149 * @param pPage The page.
4150 */
4151static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4152{
4153 /*
4154 * Map the shadow page and take action according to the page kind.
4155 */
4156 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
4157 switch (pPage->enmKind)
4158 {
4159#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4160 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4161 {
4162 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4163 void *pvGst;
4164 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4165 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4166 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4167 break;
4168 }
4169
4170 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4171 {
4172 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4173 void *pvGst;
4174 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4175 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
4176 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4177 break;
4178 }
4179
4180 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4181 {
4182 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4183 void *pvGst;
4184 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4185 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
4186 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4187 break;
4188 }
4189
4190 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4191 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4192 {
4193 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4194 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4195 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4196 break;
4197 }
4198
4199 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4200 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4201 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4202 {
4203 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4204 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4205 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4206 break;
4207 }
4208
4209#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4210 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4211 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4212 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4213 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4214 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4215 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4216 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4217 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4218 break;
4219#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4220
4221 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4222 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4223 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4224 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4225 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4226 case PGMPOOLKIND_PAE_PD_PHYS:
4227 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4228 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4229 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4230 break;
4231
4232 case PGMPOOLKIND_32BIT_PD_PHYS:
4233 case PGMPOOLKIND_32BIT_PD:
4234 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4235 break;
4236
4237 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4238 case PGMPOOLKIND_PAE_PDPT:
4239 case PGMPOOLKIND_PAE_PDPT_PHYS:
4240 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4241 break;
4242
4243 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4244 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4245 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4246 break;
4247
4248 case PGMPOOLKIND_64BIT_PML4:
4249 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4250 break;
4251
4252 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4253 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4254 break;
4255
4256 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4257 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4258 break;
4259
4260 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4261 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4262 break;
4263
4264 default:
4265 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4266 }
4267
4268 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4269 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4270 ASMMemZeroPage(pvShw);
4271 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4272 pPage->fZeroed = true;
4273 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4274}
4275#endif /* PGMPOOL_WITH_USER_TRACKING */
4276
4277/**
4278 * Flushes a pool page.
4279 *
4280 * This moves the page to the free list after removing all user references to it.
4281 *
4282 * @returns VBox status code.
4283 * @retval VINF_SUCCESS on success.
4284 * @param pPool The pool.
4285 * @param HCPhys The HC physical address of the shadow page.
4286 */
4287int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4288{
4289 PVM pVM = pPool->CTX_SUFF(pVM);
4290
4291 int rc = VINF_SUCCESS;
4292 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4293 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4294 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4295
4296 /*
4297 * Quietly reject any attempts at flushing any of the special root pages.
4298 */
4299 if (pPage->idx < PGMPOOL_IDX_FIRST)
4300 {
4301 AssertFailed(); /* can no longer happen */
4302 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4303 return VINF_SUCCESS;
4304 }
4305
4306 pgmLock(pVM);
4307
4308 /*
4309 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4310 */
4311 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4312 {
4313 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4314 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4315 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4316 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4317 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4318 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4319 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4320 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4321 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4322 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4323 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4324 pgmUnlock(pVM);
4325 return VINF_SUCCESS;
4326 }
4327
4328#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4329 /* Start a subset so we won't run out of mapping space. */
4330 PVMCPU pVCpu = VMMGetCpu(pVM);
4331 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4332#endif
4333
4334 /*
4335 * Mark the page as being in need of an ASMMemZeroPage().
4336 */
4337 pPage->fZeroed = false;
4338
4339#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4340 if (pPage->fDirty)
4341 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, true /* force removal */);
4342#endif
4343
4344#ifdef PGMPOOL_WITH_USER_TRACKING
4345 /*
4346 * Clear the page.
4347 */
4348 pgmPoolTrackClearPageUsers(pPool, pPage);
4349 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4350 pgmPoolTrackDeref(pPool, pPage);
4351 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4352#endif
4353
4354#ifdef PGMPOOL_WITH_CACHE
4355 /*
4356 * Flush it from the cache.
4357 */
4358 pgmPoolCacheFlushPage(pPool, pPage);
4359#endif /* PGMPOOL_WITH_CACHE */
4360
4361#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4362 /* Heavy stuff done. */
4363 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4364#endif
4365
4366#ifdef PGMPOOL_WITH_MONITORING
4367 /*
4368 * Deregistering the monitoring.
4369 */
4370 if (pPage->fMonitored)
4371 rc = pgmPoolMonitorFlush(pPool, pPage);
4372#endif
4373
4374 /*
4375 * Free the page.
4376 */
4377 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4378 pPage->iNext = pPool->iFreeHead;
4379 pPool->iFreeHead = pPage->idx;
4380 pPage->enmKind = PGMPOOLKIND_FREE;
4381 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4382 pPage->GCPhys = NIL_RTGCPHYS;
4383 pPage->fReusedFlushPending = false;
4384
4385 pPool->cUsedPages--;
4386 pgmUnlock(pVM);
4387 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4388 return rc;
4389}
4390
4391
4392/**
4393 * Frees a usage of a pool page.
4394 *
4395 * The caller is responsible to updating the user table so that it no longer
4396 * references the shadow page.
4397 *
4398 * @param pPool The pool.
4399 * @param HCPhys The HC physical address of the shadow page.
4400 * @param iUser The shadow page pool index of the user table.
4401 * @param iUserTable The index into the user table (shadowed).
4402 */
4403void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4404{
4405 PVM pVM = pPool->CTX_SUFF(pVM);
4406
4407 STAM_PROFILE_START(&pPool->StatFree, a);
4408 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4409 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4410 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4411 pgmLock(pVM);
4412#ifdef PGMPOOL_WITH_USER_TRACKING
4413 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4414#endif
4415#ifdef PGMPOOL_WITH_CACHE
4416 if (!pPage->fCached)
4417#endif
4418 pgmPoolFlushPage(pPool, pPage);
4419 pgmUnlock(pVM);
4420 STAM_PROFILE_STOP(&pPool->StatFree, a);
4421}
4422
4423
4424/**
4425 * Makes one or more free page free.
4426 *
4427 * @returns VBox status code.
4428 * @retval VINF_SUCCESS on success.
4429 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4430 *
4431 * @param pPool The pool.
4432 * @param enmKind Page table kind
4433 * @param iUser The user of the page.
4434 */
4435static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4436{
4437 PVM pVM = pPool->CTX_SUFF(pVM);
4438
4439 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4440
4441 /*
4442 * If the pool isn't full grown yet, expand it.
4443 */
4444 if ( pPool->cCurPages < pPool->cMaxPages
4445#if defined(IN_RC)
4446 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4447 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4448 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4449#endif
4450 )
4451 {
4452 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4453#ifdef IN_RING3
4454 int rc = PGMR3PoolGrow(pVM);
4455#else
4456 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4457#endif
4458 if (RT_FAILURE(rc))
4459 return rc;
4460 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4461 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4462 return VINF_SUCCESS;
4463 }
4464
4465#ifdef PGMPOOL_WITH_CACHE
4466 /*
4467 * Free one cached page.
4468 */
4469 return pgmPoolCacheFreeOne(pPool, iUser);
4470#else
4471 /*
4472 * Flush the pool.
4473 *
4474 * If we have tracking enabled, it should be possible to come up with
4475 * a cheap replacement strategy...
4476 */
4477 /* @todo This path no longer works (CR3 root pages will be flushed)!! */
4478 AssertCompileFailed();
4479 Assert(!CPUMIsGuestInLongMode(pVM));
4480 pgmPoolFlushAllInt(pPool);
4481 return VERR_PGM_POOL_FLUSHED;
4482#endif
4483}
4484
4485/**
4486 * Allocates a page from the pool.
4487 *
4488 * This page may actually be a cached page and not in need of any processing
4489 * on the callers part.
4490 *
4491 * @returns VBox status code.
4492 * @retval VINF_SUCCESS if a NEW page was allocated.
4493 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4494 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4495 * @param pVM The VM handle.
4496 * @param GCPhys The GC physical address of the page we're gonna shadow.
4497 * For 4MB and 2MB PD entries, it's the first address the
4498 * shadow PT is covering.
4499 * @param enmKind The kind of mapping.
4500 * @param enmAccess Access type for the mapping (only relevant for big pages)
4501 * @param iUser The shadow page pool index of the user table.
4502 * @param iUserTable The index into the user table (shadowed).
4503 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4504 * @param fLockPage Lock the page
4505 */
4506int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4507{
4508 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4509 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4510 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4511 *ppPage = NULL;
4512 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4513 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4514 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4515
4516 pgmLock(pVM);
4517
4518#ifdef PGMPOOL_WITH_CACHE
4519 if (pPool->fCacheEnabled)
4520 {
4521 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4522 if (RT_SUCCESS(rc2))
4523 {
4524 if (fLockPage)
4525 pgmPoolLockPage(pPool, *ppPage);
4526 pgmUnlock(pVM);
4527 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4528 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4529 return rc2;
4530 }
4531 }
4532#endif
4533
4534 /*
4535 * Allocate a new one.
4536 */
4537 int rc = VINF_SUCCESS;
4538 uint16_t iNew = pPool->iFreeHead;
4539 if (iNew == NIL_PGMPOOL_IDX)
4540 {
4541 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4542 if (RT_FAILURE(rc))
4543 {
4544 pgmUnlock(pVM);
4545 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4546 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4547 return rc;
4548 }
4549 iNew = pPool->iFreeHead;
4550 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4551 }
4552
4553 /* unlink the free head */
4554 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4555 pPool->iFreeHead = pPage->iNext;
4556 pPage->iNext = NIL_PGMPOOL_IDX;
4557
4558 /*
4559 * Initialize it.
4560 */
4561 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4562 pPage->enmKind = enmKind;
4563 pPage->enmAccess = enmAccess;
4564 pPage->GCPhys = GCPhys;
4565 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4566 pPage->fMonitored = false;
4567 pPage->fCached = false;
4568#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4569 pPage->fDirty = false;
4570#endif
4571 pPage->fReusedFlushPending = false;
4572#ifdef PGMPOOL_WITH_MONITORING
4573 pPage->cModifications = 0;
4574 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4575 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4576#else
4577 pPage->fCR3Mix = false;
4578#endif
4579#ifdef PGMPOOL_WITH_USER_TRACKING
4580 pPage->cPresent = 0;
4581 pPage->iFirstPresent = ~0;
4582 pPage->pvLastAccessHandlerFault = 0;
4583 pPage->cLastAccessHandlerCount = 0;
4584 pPage->pvLastAccessHandlerRip = 0;
4585
4586 /*
4587 * Insert into the tracking and cache. If this fails, free the page.
4588 */
4589 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4590 if (RT_FAILURE(rc3))
4591 {
4592 pPool->cUsedPages--;
4593 pPage->enmKind = PGMPOOLKIND_FREE;
4594 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4595 pPage->GCPhys = NIL_RTGCPHYS;
4596 pPage->iNext = pPool->iFreeHead;
4597 pPool->iFreeHead = pPage->idx;
4598 pgmUnlock(pVM);
4599 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4600 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4601 return rc3;
4602 }
4603#endif /* PGMPOOL_WITH_USER_TRACKING */
4604
4605 /*
4606 * Commit the allocation, clear the page and return.
4607 */
4608#ifdef VBOX_WITH_STATISTICS
4609 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4610 pPool->cUsedPagesHigh = pPool->cUsedPages;
4611#endif
4612
4613 if (!pPage->fZeroed)
4614 {
4615 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4616 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4617 ASMMemZeroPage(pv);
4618 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4619 }
4620
4621 *ppPage = pPage;
4622 if (fLockPage)
4623 pgmPoolLockPage(pPool, pPage);
4624 pgmUnlock(pVM);
4625 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4626 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4627 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4628 return rc;
4629}
4630
4631
4632/**
4633 * Frees a usage of a pool page.
4634 *
4635 * @param pVM The VM handle.
4636 * @param HCPhys The HC physical address of the shadow page.
4637 * @param iUser The shadow page pool index of the user table.
4638 * @param iUserTable The index into the user table (shadowed).
4639 */
4640void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4641{
4642 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4643 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4644 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4645}
4646
4647/**
4648 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4649 *
4650 * @returns Pointer to the shadow page structure.
4651 * @param pPool The pool.
4652 * @param HCPhys The HC physical address of the shadow page.
4653 */
4654PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4655{
4656 PVM pVM = pPool->CTX_SUFF(pVM);
4657
4658 Assert(PGMIsLockOwner(pVM));
4659
4660 /*
4661 * Look up the page.
4662 */
4663 pgmLock(pVM);
4664 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4665 pgmUnlock(pVM);
4666
4667 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4668 return pPage;
4669}
4670
4671
4672#ifdef IN_RING3
4673/**
4674 * Flushes the entire cache.
4675 *
4676 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4677 * and execute this CR3 flush.
4678 *
4679 * @param pPool The pool.
4680 */
4681void pgmR3PoolReset(PVM pVM)
4682{
4683 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4684
4685 Assert(PGMIsLockOwner(pVM));
4686 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
4687 LogFlow(("pgmPoolFlushAllInt:\n"));
4688
4689 /*
4690 * If there are no pages in the pool, there is nothing to do.
4691 */
4692 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4693 {
4694 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4695 return;
4696 }
4697
4698 /*
4699 * Exit the shadow mode since we're going to clear everything,
4700 * including the root page.
4701 */
4702 for (unsigned i=0;i<pVM->cCPUs;i++)
4703 {
4704 PVMCPU pVCpu = &pVM->aCpus[i];
4705 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
4706 }
4707
4708 /*
4709 * Nuke the free list and reinsert all pages into it.
4710 */
4711 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4712 {
4713 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4714
4715 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
4716#ifdef PGMPOOL_WITH_MONITORING
4717 if (pPage->fMonitored)
4718 pgmPoolMonitorFlush(pPool, pPage);
4719 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4720 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4721 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4722 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4723 pPage->cModifications = 0;
4724#endif
4725 pPage->GCPhys = NIL_RTGCPHYS;
4726 pPage->enmKind = PGMPOOLKIND_FREE;
4727 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4728 Assert(pPage->idx == i);
4729 pPage->iNext = i + 1;
4730 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4731 pPage->fSeenNonGlobal = false;
4732 pPage->fMonitored = false;
4733#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4734 pPage->fDirty = false;
4735#endif
4736 pPage->fCached = false;
4737 pPage->fReusedFlushPending = false;
4738#ifdef PGMPOOL_WITH_USER_TRACKING
4739 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4740#else
4741 pPage->fCR3Mix = false;
4742#endif
4743#ifdef PGMPOOL_WITH_CACHE
4744 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4745 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4746#endif
4747 pPage->cLocked = 0;
4748 }
4749 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4750 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4751 pPool->cUsedPages = 0;
4752
4753#ifdef PGMPOOL_WITH_USER_TRACKING
4754 /*
4755 * Zap and reinitialize the user records.
4756 */
4757 pPool->cPresent = 0;
4758 pPool->iUserFreeHead = 0;
4759 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4760 const unsigned cMaxUsers = pPool->cMaxUsers;
4761 for (unsigned i = 0; i < cMaxUsers; i++)
4762 {
4763 paUsers[i].iNext = i + 1;
4764 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4765 paUsers[i].iUserTable = 0xfffffffe;
4766 }
4767 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4768#endif
4769
4770#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4771 /*
4772 * Clear all the GCPhys links and rebuild the phys ext free list.
4773 */
4774 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
4775 pRam;
4776 pRam = pRam->CTX_SUFF(pNext))
4777 {
4778 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4779 while (iPage-- > 0)
4780 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
4781 }
4782
4783 pPool->iPhysExtFreeHead = 0;
4784 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4785 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4786 for (unsigned i = 0; i < cMaxPhysExts; i++)
4787 {
4788 paPhysExts[i].iNext = i + 1;
4789 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4790 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4791 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4792 }
4793 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4794#endif
4795
4796#ifdef PGMPOOL_WITH_MONITORING
4797 /*
4798 * Just zap the modified list.
4799 */
4800 pPool->cModifiedPages = 0;
4801 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4802#endif
4803
4804#ifdef PGMPOOL_WITH_CACHE
4805 /*
4806 * Clear the GCPhys hash and the age list.
4807 */
4808 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4809 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4810 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4811 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4812#endif
4813
4814#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4815 /* Clear all dirty pages. */
4816 pPool->idxFreeDirtyPage = 0;
4817 pPool->cDirtyPages = 0;
4818 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
4819 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
4820#endif
4821
4822 /*
4823 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4824 */
4825 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4826 {
4827 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4828 pPage->iNext = NIL_PGMPOOL_IDX;
4829#ifdef PGMPOOL_WITH_MONITORING
4830 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4831 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4832 pPage->cModifications = 0;
4833 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4834 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4835 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4836 if (pPage->fMonitored)
4837 {
4838 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4839 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4840 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4841 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4842 pPool->pszAccessHandler);
4843 AssertFatalRCSuccess(rc);
4844# ifdef PGMPOOL_WITH_CACHE
4845 pgmPoolHashInsert(pPool, pPage);
4846# endif
4847 }
4848#endif
4849#ifdef PGMPOOL_WITH_USER_TRACKING
4850 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4851#endif
4852#ifdef PGMPOOL_WITH_CACHE
4853 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4854 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4855#endif
4856 }
4857
4858 for (unsigned i=0;i<pVM->cCPUs;i++)
4859 {
4860 PVMCPU pVCpu = &pVM->aCpus[i];
4861 /*
4862 * Re-enter the shadowing mode and assert Sync CR3 FF.
4863 */
4864 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
4865 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
4866 }
4867
4868 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4869}
4870#endif /* IN_RING3 */
4871
4872#ifdef LOG_ENABLED
4873static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
4874{
4875 switch(enmKind)
4876 {
4877 case PGMPOOLKIND_INVALID:
4878 return "PGMPOOLKIND_INVALID";
4879 case PGMPOOLKIND_FREE:
4880 return "PGMPOOLKIND_FREE";
4881 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4882 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4883 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4884 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4885 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4886 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4887 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4888 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4889 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4890 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4891 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4892 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
4893 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4894 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
4895 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4896 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
4897 case PGMPOOLKIND_32BIT_PD:
4898 return "PGMPOOLKIND_32BIT_PD";
4899 case PGMPOOLKIND_32BIT_PD_PHYS:
4900 return "PGMPOOLKIND_32BIT_PD_PHYS";
4901 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4902 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
4903 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4904 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
4905 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4906 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
4907 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4908 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
4909 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4910 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
4911 case PGMPOOLKIND_PAE_PD_PHYS:
4912 return "PGMPOOLKIND_PAE_PD_PHYS";
4913 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4914 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
4915 case PGMPOOLKIND_PAE_PDPT:
4916 return "PGMPOOLKIND_PAE_PDPT";
4917 case PGMPOOLKIND_PAE_PDPT_PHYS:
4918 return "PGMPOOLKIND_PAE_PDPT_PHYS";
4919 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4920 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
4921 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4922 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
4923 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4924 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
4925 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4926 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
4927 case PGMPOOLKIND_64BIT_PML4:
4928 return "PGMPOOLKIND_64BIT_PML4";
4929 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4930 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
4931 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4932 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
4933 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4934 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
4935 case PGMPOOLKIND_ROOT_NESTED:
4936 return "PGMPOOLKIND_ROOT_NESTED";
4937 }
4938 return "Unknown kind!";
4939}
4940#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette