VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 22696

Last change on this file since 22696 was 22696, checked in by vboxsync, 15 years ago

Removed debugging code

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 185.4 KB
Line 
1/* $Id: PGMAllPool.cpp 22696 2009-09-02 08:48:49Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48RT_C_DECLS_BEGIN
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50#ifdef PGMPOOL_WITH_USER_TRACKING
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54#endif
55#ifdef PGMPOOL_WITH_CACHE
56static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
57#endif
58#ifdef PGMPOOL_WITH_MONITORING
59static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
60#endif
61#ifndef IN_RING3
62DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
63#endif
64#ifdef LOG_ENABLED
65static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
66#endif
67
68void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs);
69void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt);
70int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
71PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
72void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
73void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
74
75RT_C_DECLS_END
76
77
78/**
79 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
80 *
81 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
82 * @param enmKind The page kind.
83 */
84DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
85{
86 switch (enmKind)
87 {
88 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
89 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
90 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
91 return true;
92 default:
93 return false;
94 }
95}
96
97/** @def PGMPOOL_PAGE_2_LOCKED_PTR
98 * Maps a pool page pool into the current context and lock it (RC only).
99 *
100 * @returns VBox status code.
101 * @param pVM The VM handle.
102 * @param pPage The pool page.
103 *
104 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
105 * small page window employeed by that function. Be careful.
106 * @remark There is no need to assert on the result.
107 */
108#if defined(IN_RC)
109DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
110{
111 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
112
113 /* Make sure the dynamic mapping will not be reused. */
114 if (pv)
115 PGMDynLockHCPage(pVM, (uint8_t *)pv);
116
117 return pv;
118}
119#else
120# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
121#endif
122
123/** @def PGMPOOL_UNLOCK_PTR
124 * Unlock a previously locked dynamic caching (RC only).
125 *
126 * @returns VBox status code.
127 * @param pVM The VM handle.
128 * @param pPage The pool page.
129 *
130 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
131 * small page window employeed by that function. Be careful.
132 * @remark There is no need to assert on the result.
133 */
134#if defined(IN_RC)
135DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
136{
137 if (pvPage)
138 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
139}
140#else
141# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
142#endif
143
144
145#ifdef PGMPOOL_WITH_MONITORING
146/**
147 * Determin the size of a write instruction.
148 * @returns number of bytes written.
149 * @param pDis The disassembler state.
150 */
151static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
152{
153 /*
154 * This is very crude and possibly wrong for some opcodes,
155 * but since it's not really supposed to be called we can
156 * probably live with that.
157 */
158 return DISGetParamSize(pDis, &pDis->param1);
159}
160
161
162/**
163 * Flushes a chain of pages sharing the same access monitor.
164 *
165 * @returns VBox status code suitable for scheduling.
166 * @param pPool The pool.
167 * @param pPage A page in the chain.
168 */
169int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
170{
171 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
172
173 /*
174 * Find the list head.
175 */
176 uint16_t idx = pPage->idx;
177 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
178 {
179 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
180 {
181 idx = pPage->iMonitoredPrev;
182 Assert(idx != pPage->idx);
183 pPage = &pPool->aPages[idx];
184 }
185 }
186
187 /*
188 * Iterate the list flushing each shadow page.
189 */
190 int rc = VINF_SUCCESS;
191 for (;;)
192 {
193 idx = pPage->iMonitoredNext;
194 Assert(idx != pPage->idx);
195 if (pPage->idx >= PGMPOOL_IDX_FIRST)
196 {
197 int rc2 = pgmPoolFlushPage(pPool, pPage);
198 AssertRC(rc2);
199 }
200 /* next */
201 if (idx == NIL_PGMPOOL_IDX)
202 break;
203 pPage = &pPool->aPages[idx];
204 }
205 return rc;
206}
207
208
209/**
210 * Wrapper for getting the current context pointer to the entry being modified.
211 *
212 * @returns VBox status code suitable for scheduling.
213 * @param pVM VM Handle.
214 * @param pvDst Destination address
215 * @param pvSrc Source guest virtual address.
216 * @param GCPhysSrc The source guest physical address.
217 * @param cb Size of data to read
218 */
219DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
220{
221#if defined(IN_RING3)
222 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
223 return VINF_SUCCESS;
224#else
225 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
226 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
227#endif
228}
229
230/**
231 * Process shadow entries before they are changed by the guest.
232 *
233 * For PT entries we will clear them. For PD entries, we'll simply check
234 * for mapping conflicts and set the SyncCR3 FF if found.
235 *
236 * @param pVCpu VMCPU handle
237 * @param pPool The pool.
238 * @param pPage The head page.
239 * @param GCPhysFault The guest physical fault address.
240 * @param uAddress In R0 and GC this is the guest context fault address (flat).
241 * In R3 this is the host context 'fault' address.
242 * @param pDis The disassembler state for figuring out the write size.
243 * This need not be specified if the caller knows we won't do cross entry accesses.
244 */
245void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, PDISCPUSTATE pDis)
246{
247 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
248 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
249 const unsigned cbWrite = pDis ? pgmPoolDisasWriteSize(pDis) : 0;
250 PVM pVM = pPool->CTX_SUFF(pVM);
251
252 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)pvAddress, GCPhysFault, cbWrite));
253
254 for (;;)
255 {
256 union
257 {
258 void *pv;
259 PX86PT pPT;
260 PX86PTPAE pPTPae;
261 PX86PD pPD;
262 PX86PDPAE pPDPae;
263 PX86PDPT pPDPT;
264 PX86PML4 pPML4;
265 } uShw;
266
267 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
268
269 uShw.pv = NULL;
270 switch (pPage->enmKind)
271 {
272 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
273 {
274 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
275 const unsigned iShw = off / sizeof(X86PTE);
276 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
277 if (uShw.pPT->a[iShw].n.u1Present)
278 {
279# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
280 X86PTE GstPte;
281
282 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
283 AssertRC(rc);
284 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
285 pgmPoolTracDerefGCPhysHint(pPool, pPage,
286 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
287 GstPte.u & X86_PTE_PG_MASK);
288# endif
289 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
290 }
291 break;
292 }
293
294 /* page/2 sized */
295 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
296 {
297 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
298 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
299 {
300 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
301 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
302 if (uShw.pPTPae->a[iShw].n.u1Present)
303 {
304# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
305 X86PTE GstPte;
306 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
307 AssertRC(rc);
308
309 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
310 pgmPoolTracDerefGCPhysHint(pPool, pPage,
311 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
312 GstPte.u & X86_PTE_PG_MASK);
313# endif
314 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
315 }
316 }
317 break;
318 }
319
320 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
321 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
322 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
323 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
324 {
325 unsigned iGst = off / sizeof(X86PDE);
326 unsigned iShwPdpt = iGst / 256;
327 unsigned iShw = (iGst % 256) * 2;
328 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
329
330 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
331 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
332 {
333 for (unsigned i = 0; i < 2; i++)
334 {
335# ifndef IN_RING0
336 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
337 {
338 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
339 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
340 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
341 break;
342 }
343 else
344# endif /* !IN_RING0 */
345 if (uShw.pPDPae->a[iShw+i].n.u1Present)
346 {
347 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
348 pgmPoolFree(pVM,
349 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
350 pPage->idx,
351 iShw + i);
352 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
353 }
354
355 /* paranoia / a bit assumptive. */
356 if ( pDis
357 && (off & 3)
358 && (off & 3) + cbWrite > 4)
359 {
360 const unsigned iShw2 = iShw + 2 + i;
361 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
362 {
363# ifndef IN_RING0
364 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
365 {
366 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
367 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
368 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
369 break;
370 }
371 else
372# endif /* !IN_RING0 */
373 if (uShw.pPDPae->a[iShw2].n.u1Present)
374 {
375 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
376 pgmPoolFree(pVM,
377 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
378 pPage->idx,
379 iShw2);
380 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
381 }
382 }
383 }
384 }
385 }
386 break;
387 }
388
389 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
390 {
391 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
392 const unsigned iShw = off / sizeof(X86PTEPAE);
393 if (uShw.pPTPae->a[iShw].n.u1Present)
394 {
395# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
396 X86PTEPAE GstPte;
397 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
398 AssertRC(rc);
399
400 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
401 pgmPoolTracDerefGCPhysHint(pPool, pPage,
402 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
403 GstPte.u & X86_PTE_PAE_PG_MASK);
404# endif
405 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
406 }
407
408 /* paranoia / a bit assumptive. */
409 if ( pDis
410 && (off & 7)
411 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
412 {
413 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
414 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
415
416 if (uShw.pPTPae->a[iShw2].n.u1Present)
417 {
418# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
419 X86PTEPAE GstPte;
420# ifdef IN_RING3
421 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
422# else
423 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
424# endif
425 AssertRC(rc);
426 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
427 pgmPoolTracDerefGCPhysHint(pPool, pPage,
428 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
429 GstPte.u & X86_PTE_PAE_PG_MASK);
430# endif
431 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
432 }
433 }
434 break;
435 }
436
437 case PGMPOOLKIND_32BIT_PD:
438 {
439 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
440 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
441
442 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
443# ifndef IN_RING0
444 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
445 {
446 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
447 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
448 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
449 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
450 break;
451 }
452# endif /* !IN_RING0 */
453# ifndef IN_RING0
454 else
455# endif /* !IN_RING0 */
456 {
457 if (uShw.pPD->a[iShw].n.u1Present)
458 {
459 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
460 pgmPoolFree(pVM,
461 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
462 pPage->idx,
463 iShw);
464 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
465 }
466 }
467 /* paranoia / a bit assumptive. */
468 if ( pDis
469 && (off & 3)
470 && (off & 3) + cbWrite > sizeof(X86PTE))
471 {
472 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
473 if ( iShw2 != iShw
474 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
475 {
476# ifndef IN_RING0
477 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
478 {
479 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
480 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
481 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
482 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
483 break;
484 }
485# endif /* !IN_RING0 */
486# ifndef IN_RING0
487 else
488# endif /* !IN_RING0 */
489 {
490 if (uShw.pPD->a[iShw2].n.u1Present)
491 {
492 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
493 pgmPoolFree(pVM,
494 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
495 pPage->idx,
496 iShw2);
497 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
498 }
499 }
500 }
501 }
502#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
503 if ( uShw.pPD->a[iShw].n.u1Present
504 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
505 {
506 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
507# ifdef IN_RC /* TLB load - we're pushing things a bit... */
508 ASMProbeReadByte(pvAddress);
509# endif
510 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
511 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
512 }
513#endif
514 break;
515 }
516
517 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
518 {
519 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
520 const unsigned iShw = off / sizeof(X86PDEPAE);
521#ifndef IN_RING0
522 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
523 {
524 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
525 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
526 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
527 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
528 break;
529 }
530#endif /* !IN_RING0 */
531 /*
532 * Causes trouble when the guest uses a PDE to refer to the whole page table level
533 * structure. (Invalidate here; faults later on when it tries to change the page
534 * table entries -> recheck; probably only applies to the RC case.)
535 */
536# ifndef IN_RING0
537 else
538# endif /* !IN_RING0 */
539 {
540 if (uShw.pPDPae->a[iShw].n.u1Present)
541 {
542 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
543 pgmPoolFree(pVM,
544 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
545 pPage->idx,
546 iShw);
547 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
548 }
549 }
550 /* paranoia / a bit assumptive. */
551 if ( pDis
552 && (off & 7)
553 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
554 {
555 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
556 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
557
558#ifndef IN_RING0
559 if ( iShw2 != iShw
560 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
561 {
562 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
563 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
564 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
565 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
566 break;
567 }
568#endif /* !IN_RING0 */
569# ifndef IN_RING0
570 else
571# endif /* !IN_RING0 */
572 if (uShw.pPDPae->a[iShw2].n.u1Present)
573 {
574 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
575 pgmPoolFree(pVM,
576 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
577 pPage->idx,
578 iShw2);
579 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
580 }
581 }
582 break;
583 }
584
585 case PGMPOOLKIND_PAE_PDPT:
586 {
587 /*
588 * Hopefully this doesn't happen very often:
589 * - touching unused parts of the page
590 * - messing with the bits of pd pointers without changing the physical address
591 */
592 /* PDPT roots are not page aligned; 32 byte only! */
593 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
594
595 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
596 const unsigned iShw = offPdpt / sizeof(X86PDPE);
597 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
598 {
599# ifndef IN_RING0
600 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
601 {
602 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
603 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
604 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
605 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
606 break;
607 }
608# endif /* !IN_RING0 */
609# ifndef IN_RING0
610 else
611# endif /* !IN_RING0 */
612 if (uShw.pPDPT->a[iShw].n.u1Present)
613 {
614 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
615 pgmPoolFree(pVM,
616 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
617 pPage->idx,
618 iShw);
619 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
620 }
621
622 /* paranoia / a bit assumptive. */
623 if ( pDis
624 && (offPdpt & 7)
625 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
626 {
627 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
628 if ( iShw2 != iShw
629 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
630 {
631# ifndef IN_RING0
632 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
633 {
634 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
635 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
636 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
637 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
638 break;
639 }
640# endif /* !IN_RING0 */
641# ifndef IN_RING0
642 else
643# endif /* !IN_RING0 */
644 if (uShw.pPDPT->a[iShw2].n.u1Present)
645 {
646 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
647 pgmPoolFree(pVM,
648 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
649 pPage->idx,
650 iShw2);
651 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
652 }
653 }
654 }
655 }
656 break;
657 }
658
659#ifndef IN_RC
660 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
661 {
662 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
663 const unsigned iShw = off / sizeof(X86PDEPAE);
664 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
665 if (uShw.pPDPae->a[iShw].n.u1Present)
666 {
667 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
668 pgmPoolFree(pVM,
669 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
670 pPage->idx,
671 iShw);
672 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
673 }
674 /* paranoia / a bit assumptive. */
675 if ( pDis
676 && (off & 7)
677 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
678 {
679 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
680 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
681
682 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
683 if (uShw.pPDPae->a[iShw2].n.u1Present)
684 {
685 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
686 pgmPoolFree(pVM,
687 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
688 pPage->idx,
689 iShw2);
690 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
691 }
692 }
693 break;
694 }
695
696 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
697 {
698 /*
699 * Hopefully this doesn't happen very often:
700 * - messing with the bits of pd pointers without changing the physical address
701 */
702 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
703 {
704 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
705 const unsigned iShw = off / sizeof(X86PDPE);
706 if (uShw.pPDPT->a[iShw].n.u1Present)
707 {
708 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
709 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
710 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
711 }
712 /* paranoia / a bit assumptive. */
713 if ( pDis
714 && (off & 7)
715 && (off & 7) + cbWrite > sizeof(X86PDPE))
716 {
717 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
718 if (uShw.pPDPT->a[iShw2].n.u1Present)
719 {
720 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
721 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
722 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
723 }
724 }
725 }
726 break;
727 }
728
729 case PGMPOOLKIND_64BIT_PML4:
730 {
731 /*
732 * Hopefully this doesn't happen very often:
733 * - messing with the bits of pd pointers without changing the physical address
734 */
735 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
736 {
737 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
738 const unsigned iShw = off / sizeof(X86PDPE);
739 if (uShw.pPML4->a[iShw].n.u1Present)
740 {
741 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
742 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
743 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
744 }
745 /* paranoia / a bit assumptive. */
746 if ( pDis
747 && (off & 7)
748 && (off & 7) + cbWrite > sizeof(X86PDPE))
749 {
750 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
751 if (uShw.pPML4->a[iShw2].n.u1Present)
752 {
753 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
754 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
755 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
756 }
757 }
758 }
759 break;
760 }
761#endif /* IN_RING0 */
762
763 default:
764 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
765 }
766 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
767
768 /* next */
769 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
770 return;
771 pPage = &pPool->aPages[pPage->iMonitoredNext];
772 }
773}
774
775# ifndef IN_RING3
776/**
777 * Checks if a access could be a fork operation in progress.
778 *
779 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
780 *
781 * @returns true if it's likly that we're forking, otherwise false.
782 * @param pPool The pool.
783 * @param pDis The disassembled instruction.
784 * @param offFault The access offset.
785 */
786DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
787{
788 /*
789 * i386 linux is using btr to clear X86_PTE_RW.
790 * The functions involved are (2.6.16 source inspection):
791 * clear_bit
792 * ptep_set_wrprotect
793 * copy_one_pte
794 * copy_pte_range
795 * copy_pmd_range
796 * copy_pud_range
797 * copy_page_range
798 * dup_mmap
799 * dup_mm
800 * copy_mm
801 * copy_process
802 * do_fork
803 */
804 if ( pDis->pCurInstr->opcode == OP_BTR
805 && !(offFault & 4)
806 /** @todo Validate that the bit index is X86_PTE_RW. */
807 )
808 {
809 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
810 return true;
811 }
812 return false;
813}
814
815
816/**
817 * Determine whether the page is likely to have been reused.
818 *
819 * @returns true if we consider the page as being reused for a different purpose.
820 * @returns false if we consider it to still be a paging page.
821 * @param pVM VM Handle.
822 * @param pVCpu VMCPU Handle.
823 * @param pRegFrame Trap register frame.
824 * @param pDis The disassembly info for the faulting instruction.
825 * @param pvFault The fault address.
826 *
827 * @remark The REP prefix check is left to the caller because of STOSD/W.
828 */
829DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
830{
831#ifndef IN_RC
832 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
833 if ( HWACCMHasPendingIrq(pVM)
834 && (pRegFrame->rsp - pvFault) < 32)
835 {
836 /* Fault caused by stack writes while trying to inject an interrupt event. */
837 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
838 return true;
839 }
840#else
841 NOREF(pVM); NOREF(pvFault);
842#endif
843
844 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
845
846 /* Non-supervisor mode write means it's used for something else. */
847 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
848 return true;
849
850 switch (pDis->pCurInstr->opcode)
851 {
852 /* call implies the actual push of the return address faulted */
853 case OP_CALL:
854 Log4(("pgmPoolMonitorIsReused: CALL\n"));
855 return true;
856 case OP_PUSH:
857 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
858 return true;
859 case OP_PUSHF:
860 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
861 return true;
862 case OP_PUSHA:
863 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
864 return true;
865 case OP_FXSAVE:
866 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
867 return true;
868 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
869 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
870 return true;
871 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
872 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
873 return true;
874 case OP_MOVSWD:
875 case OP_STOSWD:
876 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
877 && pRegFrame->rcx >= 0x40
878 )
879 {
880 Assert(pDis->mode == CPUMODE_64BIT);
881
882 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
883 return true;
884 }
885 return false;
886 }
887 if ( ( (pDis->param1.flags & USE_REG_GEN32)
888 || (pDis->param1.flags & USE_REG_GEN64))
889 && (pDis->param1.base.reg_gen == USE_REG_ESP))
890 {
891 Log4(("pgmPoolMonitorIsReused: ESP\n"));
892 return true;
893 }
894
895 return false;
896}
897
898
899/**
900 * Flushes the page being accessed.
901 *
902 * @returns VBox status code suitable for scheduling.
903 * @param pVM The VM handle.
904 * @param pVCpu The VMCPU handle.
905 * @param pPool The pool.
906 * @param pPage The pool page (head).
907 * @param pDis The disassembly of the write instruction.
908 * @param pRegFrame The trap register frame.
909 * @param GCPhysFault The fault address as guest physical address.
910 * @param pvFault The fault address.
911 */
912static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
913 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
914{
915 /*
916 * First, do the flushing.
917 */
918 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
919
920 /*
921 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
922 * @todo: why is this necessary? an instruction restart would be sufficient, wouldn't it?
923 */
924 uint32_t cbWritten;
925 int rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten);
926 if (RT_SUCCESS(rc2))
927 pRegFrame->rip += pDis->opsize;
928 else if (rc2 == VERR_EM_INTERPRETER)
929 {
930#ifdef IN_RC
931 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
932 {
933 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
934 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
935 rc = VINF_SUCCESS;
936 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
937 }
938 else
939#endif
940 {
941 rc = VINF_EM_RAW_EMULATE_INSTR;
942 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
943 }
944 }
945 else
946 rc = rc2;
947
948 /* See use in pgmPoolAccessHandlerSimple(). */
949 PGM_INVL_VCPU_TLBS(pVCpu);
950
951 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
952 return rc;
953
954}
955
956
957/**
958 * Handles the STOSD write accesses.
959 *
960 * @returns VBox status code suitable for scheduling.
961 * @param pVM The VM handle.
962 * @param pPool The pool.
963 * @param pPage The pool page (head).
964 * @param pDis The disassembly of the write instruction.
965 * @param pRegFrame The trap register frame.
966 * @param GCPhysFault The fault address as guest physical address.
967 * @param pvFault The fault address.
968 */
969DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
970 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
971{
972 unsigned uIncrement = pDis->param1.size;
973
974 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
975 Assert(pRegFrame->rcx <= 0x20);
976
977#ifdef VBOX_STRICT
978 if (pDis->opmode == CPUMODE_32BIT)
979 Assert(uIncrement == 4);
980 else
981 Assert(uIncrement == 8);
982#endif
983
984 Log3(("pgmPoolAccessHandlerSTOSD\n"));
985
986 /*
987 * Increment the modification counter and insert it into the list
988 * of modified pages the first time.
989 */
990 if (!pPage->cModifications++)
991 pgmPoolMonitorModifiedInsert(pPool, pPage);
992
993 /*
994 * Execute REP STOSD.
995 *
996 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
997 * write situation, meaning that it's safe to write here.
998 */
999 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1000 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
1001 while (pRegFrame->rcx)
1002 {
1003#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1004 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1005 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1006 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1007#else
1008 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1009#endif
1010#ifdef IN_RC
1011 *(uint32_t *)pu32 = pRegFrame->eax;
1012#else
1013 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
1014#endif
1015 pu32 += uIncrement;
1016 GCPhysFault += uIncrement;
1017 pRegFrame->rdi += uIncrement;
1018 pRegFrame->rcx--;
1019 }
1020 pRegFrame->rip += pDis->opsize;
1021
1022#ifdef IN_RC
1023 /* See use in pgmPoolAccessHandlerSimple(). */
1024 PGM_INVL_VCPU_TLBS(pVCpu);
1025#endif
1026
1027 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1028 return VINF_SUCCESS;
1029}
1030
1031
1032/**
1033 * Handles the simple write accesses.
1034 *
1035 * @returns VBox status code suitable for scheduling.
1036 * @param pVM The VM handle.
1037 * @param pVCpu The VMCPU handle.
1038 * @param pPool The pool.
1039 * @param pPage The pool page (head).
1040 * @param pDis The disassembly of the write instruction.
1041 * @param pRegFrame The trap register frame.
1042 * @param GCPhysFault The fault address as guest physical address.
1043 * @param pvFault The fault address.
1044 */
1045DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1046 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1047{
1048 Log3(("pgmPoolAccessHandlerSimple\n"));
1049 /*
1050 * Increment the modification counter and insert it into the list
1051 * of modified pages the first time.
1052 */
1053 if (!pPage->cModifications++)
1054 pgmPoolMonitorModifiedInsert(pPool, pPage);
1055
1056 /*
1057 * Clear all the pages. ASSUMES that pvFault is readable.
1058 */
1059#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1060 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1061 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, pDis);
1062 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1063#else
1064 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, pDis);
1065#endif
1066
1067 /*
1068 * Interpret the instruction.
1069 */
1070 uint32_t cb;
1071 int rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb);
1072 if (RT_SUCCESS(rc))
1073 pRegFrame->rip += pDis->opsize;
1074 else if (rc == VERR_EM_INTERPRETER)
1075 {
1076 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1077 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1078 rc = VINF_EM_RAW_EMULATE_INSTR;
1079 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1080 }
1081
1082#ifdef IN_RC
1083 /*
1084 * Quick hack, with logging enabled we're getting stale
1085 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1086 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1087 * have to be fixed to support this. But that'll have to wait till next week.
1088 *
1089 * An alternative is to keep track of the changed PTEs together with the
1090 * GCPhys from the guest PT. This may proove expensive though.
1091 *
1092 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1093 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1094 */
1095 PGM_INVL_VCPU_TLBS(pVCpu);
1096#endif
1097
1098 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1099 return rc;
1100}
1101
1102/**
1103 * \#PF Handler callback for PT write accesses.
1104 *
1105 * @returns VBox status code (appropriate for GC return).
1106 * @param pVM VM Handle.
1107 * @param uErrorCode CPU Error code.
1108 * @param pRegFrame Trap register frame.
1109 * NULL on DMA and other non CPU access.
1110 * @param pvFault The fault address (cr2).
1111 * @param GCPhysFault The GC physical address corresponding to pvFault.
1112 * @param pvUser User argument.
1113 */
1114DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1115{
1116 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1117 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1118 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1119 PVMCPU pVCpu = VMMGetCpu(pVM);
1120 unsigned cMaxModifications;
1121 bool fForcedFlush = false;
1122
1123 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1124
1125 pgmLock(pVM);
1126 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1127 {
1128 /* Pool page changed while we were waiting for the lock; ignore. */
1129 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1130 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1131 pgmUnlock(pVM);
1132 return VINF_SUCCESS;
1133 }
1134
1135 /*
1136 * Disassemble the faulting instruction.
1137 */
1138 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1139 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1140 AssertReturnStmt(rc == VINF_SUCCESS, pgmUnlock(pVM), rc);
1141
1142 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1143
1144 /*
1145 * We should ALWAYS have the list head as user parameter. This
1146 * is because we use that page to record the changes.
1147 */
1148 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1149#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1150 Assert(!pPage->fDirty);
1151#endif
1152
1153 /* Maximum nr of modifications depends on the guest mode. */
1154 if (pDis->mode == CPUMODE_32BIT)
1155 cMaxModifications = 48;
1156 else
1157 cMaxModifications = 24;
1158
1159 /*
1160 * Incremental page table updates should weight more than random ones.
1161 * (Only applies when started from offset 0)
1162 */
1163 pVCpu->pgm.s.cPoolAccessHandler++;
1164 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1165 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1166 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1167 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1168 {
1169 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1170 pPage->cModifications = pPage->cModifications * 2;
1171 pPage->pvLastAccessHandlerFault = pvFault;
1172 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1173 if (pPage->cModifications >= cMaxModifications)
1174 {
1175 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1176 fForcedFlush = true;
1177 }
1178 }
1179
1180 if (pPage->cModifications >= cMaxModifications)
1181 Log(("Mod overflow %VGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1182
1183 /*
1184 * Check if it's worth dealing with.
1185 */
1186 bool fReused = false;
1187 bool fNotReusedNotForking = false;
1188 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1189 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1190 )
1191 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1192 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1193 {
1194 /*
1195 * Simple instructions, no REP prefix.
1196 */
1197 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1198 {
1199 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1200
1201 /* A mov instruction to change the first page table entry will be remembered so we can detect
1202 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1203 */
1204 if ( rc == VINF_SUCCESS
1205 && pDis->pCurInstr->opcode == OP_MOV
1206 && (pvFault & PAGE_OFFSET_MASK) == 0)
1207 {
1208 pPage->pvLastAccessHandlerFault = pvFault;
1209 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1210 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1211 /* Make sure we don't kick out a page too quickly. */
1212 if (pPage->cModifications > 8)
1213 pPage->cModifications = 2;
1214 }
1215 else
1216 if (pPage->pvLastAccessHandlerFault == pvFault)
1217 {
1218 /* ignore the 2nd write to this page table entry. */
1219 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1220 }
1221 else
1222 {
1223 pPage->pvLastAccessHandlerFault = 0;
1224 pPage->pvLastAccessHandlerRip = 0;
1225 }
1226
1227 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1228 pgmUnlock(pVM);
1229 return rc;
1230 }
1231
1232 /*
1233 * Windows is frequently doing small memset() operations (netio test 4k+).
1234 * We have to deal with these or we'll kill the cache and performance.
1235 */
1236 if ( pDis->pCurInstr->opcode == OP_STOSWD
1237 && !pRegFrame->eflags.Bits.u1DF
1238 && pDis->opmode == pDis->mode
1239 && pDis->addrmode == pDis->mode)
1240 {
1241 bool fValidStosd = false;
1242
1243 if ( pDis->mode == CPUMODE_32BIT
1244 && pDis->prefix == PREFIX_REP
1245 && pRegFrame->ecx <= 0x20
1246 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1247 && !((uintptr_t)pvFault & 3)
1248 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1249 )
1250 {
1251 fValidStosd = true;
1252 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1253 }
1254 else
1255 if ( pDis->mode == CPUMODE_64BIT
1256 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1257 && pRegFrame->rcx <= 0x20
1258 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1259 && !((uintptr_t)pvFault & 7)
1260 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1261 )
1262 {
1263 fValidStosd = true;
1264 }
1265
1266 if (fValidStosd)
1267 {
1268 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1269 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1270 pgmUnlock(pVM);
1271 return rc;
1272 }
1273 }
1274
1275 /* REP prefix, don't bother. */
1276 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1277 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1278 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1279 fNotReusedNotForking = true;
1280 }
1281
1282#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1283 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1284 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1285 */
1286 if ( pPage->cModifications >= cMaxModifications
1287 && !fForcedFlush
1288 && pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1289 && ( fNotReusedNotForking
1290 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1291 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1292 )
1293 )
1294 {
1295 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1296 Assert(pPage->fDirty == false);
1297
1298 /* Flush any monitored duplicates as we will disable write protection. */
1299 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1300 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1301 {
1302 PPGMPOOLPAGE pPageHead = pPage;
1303
1304 /* Find the monitor head. */
1305 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1306 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1307
1308 while (pPageHead)
1309 {
1310 unsigned idxNext = pPageHead->iMonitoredNext;
1311
1312 if (pPageHead != pPage)
1313 {
1314 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1315 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1316 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1317 AssertRC(rc2);
1318 }
1319
1320 if (idxNext == NIL_PGMPOOL_IDX)
1321 break;
1322
1323 pPageHead = &pPool->aPages[idxNext];
1324 }
1325 }
1326
1327 /* The flushing above might fail for locked pages, so double check. */
1328 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1329 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1330 {
1331 /* Temporarily allow write access to the page table again. */
1332 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
1333 if (rc == VINF_SUCCESS)
1334 {
1335 rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
1336 AssertMsg(rc == VINF_SUCCESS
1337 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1338 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1339 || rc == VERR_PAGE_NOT_PRESENT,
1340 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1341
1342 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1343 pPage->pvDirtyFault = pvFault;
1344
1345 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1346 pgmUnlock(pVM);
1347 return rc;
1348 }
1349 }
1350 }
1351#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1352
1353 /*
1354 * Not worth it, so flush it.
1355 *
1356 * If we considered it to be reused, don't go back to ring-3
1357 * to emulate failed instructions since we usually cannot
1358 * interpret then. This may be a bit risky, in which case
1359 * the reuse detection must be fixed.
1360 */
1361 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1362 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1363 rc = VINF_SUCCESS;
1364 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1365 pgmUnlock(pVM);
1366 return rc;
1367}
1368
1369# endif /* !IN_RING3 */
1370
1371# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1372/**
1373 * Check references to guest physical memory in a PAE / PAE page table.
1374 *
1375 * @param pPool The pool.
1376 * @param pPage The page.
1377 * @param pShwPT The shadow page table (mapping of the page).
1378 * @param pGstPT The guest page table.
1379 */
1380DECLINLINE(void) pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
1381{
1382 unsigned cErrors = 0;
1383 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
1384 {
1385 if (pShwPT->a[i].n.u1Present)
1386 {
1387 RTHCPHYS HCPhys = -1;
1388 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1389 if ( rc != VINF_SUCCESS
1390 || (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) != HCPhys)
1391 {
1392 RTHCPHYS HCPhysPT = -1;
1393 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1394 cErrors++;
1395
1396 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pPage->GCPhys, &HCPhysPT);
1397 AssertRC(rc);
1398
1399 for (unsigned i = 0; i < pPool->cCurPages; i++)
1400 {
1401 PPGMPOOLPAGE pTempPage = &pPool->aPages[i];
1402
1403 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1404 {
1405 PX86PTPAE pShwPT2 = (PX86PTPAE)PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pTempPage);
1406
1407 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1408 {
1409 if ( pShwPT2->a[j].n.u1Present
1410 && pShwPT2->a[j].n.u1Write
1411 && ((pShwPT2->a[j].u & X86_PTE_PAE_PG_MASK) == HCPhysPT))
1412 {
1413 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, pShwPT->a[j].u, pShwPT2->a[j].u));
1414 }
1415 }
1416 }
1417 }
1418 }
1419 }
1420 }
1421 Assert(!cErrors);
1422}
1423
1424/**
1425 * Clear references to guest physical memory in a PAE / PAE page table.
1426 *
1427 * @returns nr of changed PTEs
1428 * @param pPool The pool.
1429 * @param pPage The page.
1430 * @param pShwPT The shadow page table (mapping of the page).
1431 * @param pGstPT The guest page table.
1432 * @param pOldGstPT The old cached guest page table.
1433 */
1434DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT, PCX86PTPAE pOldGstPT)
1435{
1436 unsigned cChanged = 0;
1437
1438 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
1439 {
1440 if (pShwPT->a[i].n.u1Present)
1441 {
1442 /* The the old cached PTE is identical, then there's no need to flush the shadow copy. */
1443 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1444 {
1445#ifdef VBOX_STRICT
1446 RTHCPHYS HCPhys = -1;
1447 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1448 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1449#endif
1450 uint64_t uHostAttr = pShwPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1451 bool fHostRW = !!(pShwPT->a[i].u & X86_PTE_RW);
1452 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1453 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1454
1455 if ( uHostAttr == uGuestAttr
1456 && fHostRW <= fGuestRW)
1457 continue;
1458 }
1459 cChanged++;
1460 /* Something was changed, so flush it. */
1461 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1462 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1463 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
1464 ASMAtomicWriteSize(&pShwPT->a[i].u, 0);
1465 }
1466 }
1467 return cChanged;
1468}
1469
1470
1471/**
1472 * Flush a dirty page
1473 *
1474 * @param pVM VM Handle.
1475 * @param pPool The pool.
1476 * @param idxSlot Dirty array slot index
1477 * @param fForceRemoval Force removal from the dirty page list
1478 */
1479static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fForceRemoval = false)
1480{
1481 PPGMPOOLPAGE pPage;
1482 unsigned idxPage;
1483
1484 Assert(idxSlot < RT_ELEMENTS(pPool->aIdxDirtyPages));
1485 if (pPool->aIdxDirtyPages[idxSlot] == NIL_PGMPOOL_IDX)
1486 return;
1487
1488 idxPage = pPool->aIdxDirtyPages[idxSlot];
1489 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1490 pPage = &pPool->aPages[idxPage];
1491 Assert(pPage->idx == idxPage);
1492 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1493
1494 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1495 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1496
1497 /* Flush those PTEs that have changed. */
1498 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1499 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1500 void *pvGst;
1501 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1502 unsigned cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst, (PCX86PTPAE)&pPool->aDirtyPages[idxSlot][0]);
1503 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1504
1505 /** Note: we might want to consider keeping the dirty page active in case there were many changes. */
1506
1507 /* Write protect the page again to catch all write accesses. */
1508 rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys);
1509 Assert(rc == VINF_SUCCESS);
1510 pPage->fDirty = false;
1511
1512#ifdef VBOX_STRICT
1513 uint64_t fFlags = 0;
1514 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, NULL);
1515 AssertMsg( (rc == VINF_SUCCESS && !(fFlags & X86_PTE_RW))
1516 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1517 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1518 || rc == VERR_PAGE_NOT_PRESENT,
1519 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1520#endif
1521
1522 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1523 Assert(pPage->cModifications);
1524 if (cChanges < 4)
1525 pPage->cModifications = 1; /* must use > 0 here */
1526 else
1527 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1528
1529 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1530 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages))
1531 pPool->idxFreeDirtyPage = idxSlot;
1532
1533 pPool->cDirtyPages--;
1534 pPool->aIdxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1535 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1536 Log(("Removed dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1537}
1538
1539# ifndef IN_RING3
1540/**
1541 * Add a new dirty page
1542 *
1543 * @param pVM VM Handle.
1544 * @param pPool The pool.
1545 * @param pPage The page.
1546 */
1547void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1548{
1549 unsigned idxFree;
1550
1551 Assert(PGMIsLocked(pVM));
1552 AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
1553 Assert(!pPage->fDirty);
1554
1555 idxFree = pPool->idxFreeDirtyPage;
1556 Assert(idxFree < RT_ELEMENTS(pPool->aIdxDirtyPages));
1557 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1558
1559 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aIdxDirtyPages))
1560 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* force removal */);
1561 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages));
1562 AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1563
1564 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1565
1566 /* Make a copy of the guest page table as we require valid GCPhys addresses when removing
1567 * references to physical pages. (the HCPhys linear lookup is *extremely* expensive!)
1568 */
1569 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1570 void *pvGst;
1571 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1572 memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
1573 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1574
1575 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1576 pPage->fDirty = true;
1577 pPage->idxDirty = idxFree;
1578 pPool->aIdxDirtyPages[idxFree] = pPage->idx;
1579 pPool->cDirtyPages++;
1580
1581 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1582 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages)
1583 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1584 {
1585 unsigned i;
1586 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1587 {
1588 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1589 if (pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1590 {
1591 pPool->idxFreeDirtyPage = idxFree;
1592 break;
1593 }
1594 }
1595 Assert(i != RT_ELEMENTS(pPool->aIdxDirtyPages));
1596 }
1597
1598 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages) || pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1599 return;
1600}
1601# endif /* !IN_RING3 */
1602
1603/**
1604 * Reset all dirty pages by reinstating page monitoring.
1605 *
1606 * @param pVM VM Handle.
1607 * @param fForceRemoval Force removal of all dirty pages
1608 */
1609void pgmPoolResetDirtyPages(PVM pVM, bool fForceRemoval)
1610{
1611 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1612 Assert(PGMIsLocked(pVM));
1613 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1614
1615 if (!pPool->cDirtyPages)
1616 return;
1617
1618 Log(("pgmPoolResetDirtyPages\n"));
1619 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1620 pgmPoolFlushDirtyPage(pVM, pPool, i, fForceRemoval);
1621
1622 pPool->idxFreeDirtyPage = 0;
1623 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1624 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1625 {
1626 unsigned i;
1627 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1628 {
1629 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1630 {
1631 pPool->idxFreeDirtyPage = i;
1632 break;
1633 }
1634 }
1635 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1636 }
1637
1638 Assert(pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages));
1639 return;
1640}
1641# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1642#endif /* PGMPOOL_WITH_MONITORING */
1643
1644#ifdef PGMPOOL_WITH_CACHE
1645
1646/**
1647 * Inserts a page into the GCPhys hash table.
1648 *
1649 * @param pPool The pool.
1650 * @param pPage The page.
1651 */
1652DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1653{
1654 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1655 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1656 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1657 pPage->iNext = pPool->aiHash[iHash];
1658 pPool->aiHash[iHash] = pPage->idx;
1659}
1660
1661
1662/**
1663 * Removes a page from the GCPhys hash table.
1664 *
1665 * @param pPool The pool.
1666 * @param pPage The page.
1667 */
1668DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1669{
1670 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1671 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1672 if (pPool->aiHash[iHash] == pPage->idx)
1673 pPool->aiHash[iHash] = pPage->iNext;
1674 else
1675 {
1676 uint16_t iPrev = pPool->aiHash[iHash];
1677 for (;;)
1678 {
1679 const int16_t i = pPool->aPages[iPrev].iNext;
1680 if (i == pPage->idx)
1681 {
1682 pPool->aPages[iPrev].iNext = pPage->iNext;
1683 break;
1684 }
1685 if (i == NIL_PGMPOOL_IDX)
1686 {
1687 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1688 break;
1689 }
1690 iPrev = i;
1691 }
1692 }
1693 pPage->iNext = NIL_PGMPOOL_IDX;
1694}
1695
1696
1697/**
1698 * Frees up one cache page.
1699 *
1700 * @returns VBox status code.
1701 * @retval VINF_SUCCESS on success.
1702 * @param pPool The pool.
1703 * @param iUser The user index.
1704 */
1705static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1706{
1707#ifndef IN_RC
1708 const PVM pVM = pPool->CTX_SUFF(pVM);
1709#endif
1710 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1711 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1712
1713 /*
1714 * Select one page from the tail of the age list.
1715 */
1716 PPGMPOOLPAGE pPage;
1717 for (unsigned iLoop = 0; ; iLoop++)
1718 {
1719 uint16_t iToFree = pPool->iAgeTail;
1720 if (iToFree == iUser)
1721 iToFree = pPool->aPages[iToFree].iAgePrev;
1722/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1723 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1724 {
1725 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1726 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1727 {
1728 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1729 continue;
1730 iToFree = i;
1731 break;
1732 }
1733 }
1734*/
1735 Assert(iToFree != iUser);
1736 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1737 pPage = &pPool->aPages[iToFree];
1738
1739 /*
1740 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1741 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1742 */
1743 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1744 break;
1745 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1746 pgmPoolCacheUsed(pPool, pPage);
1747 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1748 }
1749
1750 /*
1751 * Found a usable page, flush it and return.
1752 */
1753 int rc = pgmPoolFlushPage(pPool, pPage);
1754 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1755 if (rc == VINF_SUCCESS)
1756 PGM_INVL_ALL_VCPU_TLBS(pVM);
1757 return rc;
1758}
1759
1760
1761/**
1762 * Checks if a kind mismatch is really a page being reused
1763 * or if it's just normal remappings.
1764 *
1765 * @returns true if reused and the cached page (enmKind1) should be flushed
1766 * @returns false if not reused.
1767 * @param enmKind1 The kind of the cached page.
1768 * @param enmKind2 The kind of the requested page.
1769 */
1770static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1771{
1772 switch (enmKind1)
1773 {
1774 /*
1775 * Never reuse them. There is no remapping in non-paging mode.
1776 */
1777 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1778 case PGMPOOLKIND_32BIT_PD_PHYS:
1779 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1780 case PGMPOOLKIND_PAE_PD_PHYS:
1781 case PGMPOOLKIND_PAE_PDPT_PHYS:
1782 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1783 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1784 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1785 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1786 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1787 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1788 return false;
1789
1790 /*
1791 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1792 */
1793 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1794 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1795 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1796 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1797 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1798 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1799 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1800 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1801 case PGMPOOLKIND_32BIT_PD:
1802 case PGMPOOLKIND_PAE_PDPT:
1803 switch (enmKind2)
1804 {
1805 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1806 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1807 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1808 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1809 case PGMPOOLKIND_64BIT_PML4:
1810 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1811 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1812 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1813 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1814 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1815 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1816 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1817 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1818 return true;
1819 default:
1820 return false;
1821 }
1822
1823 /*
1824 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1825 */
1826 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1827 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1828 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1829 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1830 case PGMPOOLKIND_64BIT_PML4:
1831 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1832 switch (enmKind2)
1833 {
1834 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1835 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1836 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1837 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1838 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1839 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1840 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1841 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1842 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1843 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1844 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1845 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1846 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1847 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1848 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1849 return true;
1850 default:
1851 return false;
1852 }
1853
1854 /*
1855 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1856 */
1857 case PGMPOOLKIND_ROOT_NESTED:
1858 return false;
1859
1860 default:
1861 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1862 }
1863}
1864
1865
1866/**
1867 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1868 *
1869 * @returns VBox status code.
1870 * @retval VINF_PGM_CACHED_PAGE on success.
1871 * @retval VERR_FILE_NOT_FOUND if not found.
1872 * @param pPool The pool.
1873 * @param GCPhys The GC physical address of the page we're gonna shadow.
1874 * @param enmKind The kind of mapping.
1875 * @param enmAccess Access type for the mapping (only relevant for big pages)
1876 * @param iUser The shadow page pool index of the user table.
1877 * @param iUserTable The index into the user table (shadowed).
1878 * @param ppPage Where to store the pointer to the page.
1879 */
1880static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1881{
1882#ifndef IN_RC
1883 const PVM pVM = pPool->CTX_SUFF(pVM);
1884#endif
1885 /*
1886 * Look up the GCPhys in the hash.
1887 */
1888 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1889 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
1890 if (i != NIL_PGMPOOL_IDX)
1891 {
1892 do
1893 {
1894 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1895 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1896 if (pPage->GCPhys == GCPhys)
1897 {
1898 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
1899 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
1900 {
1901 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
1902 * doesn't flush it in case there are no more free use records.
1903 */
1904 pgmPoolCacheUsed(pPool, pPage);
1905
1906 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1907 if (RT_SUCCESS(rc))
1908 {
1909 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
1910 *ppPage = pPage;
1911 if (pPage->cModifications)
1912 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
1913 STAM_COUNTER_INC(&pPool->StatCacheHits);
1914 return VINF_PGM_CACHED_PAGE;
1915 }
1916 return rc;
1917 }
1918
1919 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
1920 {
1921 /*
1922 * The kind is different. In some cases we should now flush the page
1923 * as it has been reused, but in most cases this is normal remapping
1924 * of PDs as PT or big pages using the GCPhys field in a slightly
1925 * different way than the other kinds.
1926 */
1927 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1928 {
1929 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1930 pgmPoolFlushPage(pPool, pPage);
1931 PGM_INVL_VCPU_TLBS(VMMGetCpu(pVM)); /* see PT handler. */
1932 break;
1933 }
1934 }
1935 }
1936
1937 /* next */
1938 i = pPage->iNext;
1939 } while (i != NIL_PGMPOOL_IDX);
1940 }
1941
1942 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
1943 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1944 return VERR_FILE_NOT_FOUND;
1945}
1946
1947
1948/**
1949 * Inserts a page into the cache.
1950 *
1951 * @param pPool The pool.
1952 * @param pPage The cached page.
1953 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1954 */
1955static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1956{
1957 /*
1958 * Insert into the GCPhys hash if the page is fit for that.
1959 */
1960 Assert(!pPage->fCached);
1961 if (fCanBeCached)
1962 {
1963 pPage->fCached = true;
1964 pgmPoolHashInsert(pPool, pPage);
1965 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1966 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1967 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1968 }
1969 else
1970 {
1971 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1972 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1973 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1974 }
1975
1976 /*
1977 * Insert at the head of the age list.
1978 */
1979 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1980 pPage->iAgeNext = pPool->iAgeHead;
1981 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1982 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1983 else
1984 pPool->iAgeTail = pPage->idx;
1985 pPool->iAgeHead = pPage->idx;
1986}
1987
1988
1989/**
1990 * Flushes a cached page.
1991 *
1992 * @param pPool The pool.
1993 * @param pPage The cached page.
1994 */
1995static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1996{
1997 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1998
1999 /*
2000 * Remove the page from the hash.
2001 */
2002 if (pPage->fCached)
2003 {
2004 pPage->fCached = false;
2005 pgmPoolHashRemove(pPool, pPage);
2006 }
2007 else
2008 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2009
2010 /*
2011 * Remove it from the age list.
2012 */
2013 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2014 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2015 else
2016 pPool->iAgeTail = pPage->iAgePrev;
2017 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2018 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2019 else
2020 pPool->iAgeHead = pPage->iAgeNext;
2021 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2022 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2023}
2024
2025#endif /* PGMPOOL_WITH_CACHE */
2026#ifdef PGMPOOL_WITH_MONITORING
2027
2028/**
2029 * Looks for pages sharing the monitor.
2030 *
2031 * @returns Pointer to the head page.
2032 * @returns NULL if not found.
2033 * @param pPool The Pool
2034 * @param pNewPage The page which is going to be monitored.
2035 */
2036static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2037{
2038#ifdef PGMPOOL_WITH_CACHE
2039 /*
2040 * Look up the GCPhys in the hash.
2041 */
2042 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2043 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2044 if (i == NIL_PGMPOOL_IDX)
2045 return NULL;
2046 do
2047 {
2048 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2049 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2050 && pPage != pNewPage)
2051 {
2052 switch (pPage->enmKind)
2053 {
2054 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2055 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2056 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2057 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2058 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2059 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2060 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2061 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2062 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2063 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2064 case PGMPOOLKIND_64BIT_PML4:
2065 case PGMPOOLKIND_32BIT_PD:
2066 case PGMPOOLKIND_PAE_PDPT:
2067 {
2068 /* find the head */
2069 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2070 {
2071 Assert(pPage->iMonitoredPrev != pPage->idx);
2072 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2073 }
2074 return pPage;
2075 }
2076
2077 /* ignore, no monitoring. */
2078 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2079 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2080 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2081 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2082 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2083 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2084 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2085 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2086 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2087 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2088 case PGMPOOLKIND_ROOT_NESTED:
2089 case PGMPOOLKIND_PAE_PD_PHYS:
2090 case PGMPOOLKIND_PAE_PDPT_PHYS:
2091 case PGMPOOLKIND_32BIT_PD_PHYS:
2092 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2093 break;
2094 default:
2095 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2096 }
2097 }
2098
2099 /* next */
2100 i = pPage->iNext;
2101 } while (i != NIL_PGMPOOL_IDX);
2102#endif
2103 return NULL;
2104}
2105
2106
2107/**
2108 * Enabled write monitoring of a guest page.
2109 *
2110 * @returns VBox status code.
2111 * @retval VINF_SUCCESS on success.
2112 * @param pPool The pool.
2113 * @param pPage The cached page.
2114 */
2115static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2116{
2117 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
2118
2119 /*
2120 * Filter out the relevant kinds.
2121 */
2122 switch (pPage->enmKind)
2123 {
2124 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2125 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2126 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2127 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2128 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2129 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2130 case PGMPOOLKIND_64BIT_PML4:
2131 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2132 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2133 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2134 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2135 case PGMPOOLKIND_32BIT_PD:
2136 case PGMPOOLKIND_PAE_PDPT:
2137 break;
2138
2139 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2140 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2141 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2142 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2143 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2144 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2145 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2146 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2147 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2148 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2149 case PGMPOOLKIND_ROOT_NESTED:
2150 /* Nothing to monitor here. */
2151 return VINF_SUCCESS;
2152
2153 case PGMPOOLKIND_32BIT_PD_PHYS:
2154 case PGMPOOLKIND_PAE_PDPT_PHYS:
2155 case PGMPOOLKIND_PAE_PD_PHYS:
2156 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2157 /* Nothing to monitor here. */
2158 return VINF_SUCCESS;
2159#ifdef PGMPOOL_WITH_MIXED_PT_CR3
2160 break;
2161#else
2162 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2163#endif
2164 default:
2165 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2166 }
2167
2168 /*
2169 * Install handler.
2170 */
2171 int rc;
2172 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2173 if (pPageHead)
2174 {
2175 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2176 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2177
2178#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2179 if (pPageHead->fDirty)
2180 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, true /* force removal */);
2181#endif
2182
2183 pPage->iMonitoredPrev = pPageHead->idx;
2184 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2185 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2186 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2187 pPageHead->iMonitoredNext = pPage->idx;
2188 rc = VINF_SUCCESS;
2189 }
2190 else
2191 {
2192 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2193 PVM pVM = pPool->CTX_SUFF(pVM);
2194 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2195 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2196 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
2197 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2198 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2199 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2200 pPool->pszAccessHandler);
2201 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2202 * the heap size should suffice. */
2203 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2204 Assert(!(VMMGetCpu(pVM)->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(VMMGetCpu(pVM), VMCPU_FF_PGM_SYNC_CR3));
2205 }
2206 pPage->fMonitored = true;
2207 return rc;
2208}
2209
2210
2211/**
2212 * Disables write monitoring of a guest page.
2213 *
2214 * @returns VBox status code.
2215 * @retval VINF_SUCCESS on success.
2216 * @param pPool The pool.
2217 * @param pPage The cached page.
2218 */
2219static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2220{
2221 /*
2222 * Filter out the relevant kinds.
2223 */
2224 switch (pPage->enmKind)
2225 {
2226 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2227 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2228 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2229 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2230 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2231 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2232 case PGMPOOLKIND_64BIT_PML4:
2233 case PGMPOOLKIND_32BIT_PD:
2234 case PGMPOOLKIND_PAE_PDPT:
2235 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2236 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2237 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2238 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2239 break;
2240
2241 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2242 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2243 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2244 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2245 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2246 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2247 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2248 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2249 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2250 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2251 case PGMPOOLKIND_ROOT_NESTED:
2252 case PGMPOOLKIND_PAE_PD_PHYS:
2253 case PGMPOOLKIND_PAE_PDPT_PHYS:
2254 case PGMPOOLKIND_32BIT_PD_PHYS:
2255 /* Nothing to monitor here. */
2256 return VINF_SUCCESS;
2257
2258#ifdef PGMPOOL_WITH_MIXED_PT_CR3
2259 break;
2260#endif
2261 default:
2262 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2263 }
2264
2265 /*
2266 * Remove the page from the monitored list or uninstall it if last.
2267 */
2268 const PVM pVM = pPool->CTX_SUFF(pVM);
2269 int rc;
2270 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2271 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2272 {
2273 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2274 {
2275 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2276 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2277 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2278 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2279 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2280 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2281 pPool->pszAccessHandler);
2282 AssertFatalRCSuccess(rc);
2283 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2284 }
2285 else
2286 {
2287 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2288 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2289 {
2290 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2291 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2292 }
2293 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2294 rc = VINF_SUCCESS;
2295 }
2296 }
2297 else
2298 {
2299 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2300 AssertFatalRC(rc);
2301#ifdef VBOX_STRICT
2302 PVMCPU pVCpu = VMMGetCpu(pVM);
2303#endif
2304 AssertMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2305 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2306 }
2307 pPage->fMonitored = false;
2308
2309 /*
2310 * Remove it from the list of modified pages (if in it).
2311 */
2312 pgmPoolMonitorModifiedRemove(pPool, pPage);
2313
2314 return rc;
2315}
2316
2317
2318/**
2319 * Inserts the page into the list of modified pages.
2320 *
2321 * @param pPool The pool.
2322 * @param pPage The page.
2323 */
2324void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2325{
2326 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2327 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2328 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2329 && pPool->iModifiedHead != pPage->idx,
2330 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2331 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2332 pPool->iModifiedHead, pPool->cModifiedPages));
2333
2334 pPage->iModifiedNext = pPool->iModifiedHead;
2335 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2336 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2337 pPool->iModifiedHead = pPage->idx;
2338 pPool->cModifiedPages++;
2339#ifdef VBOX_WITH_STATISTICS
2340 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2341 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2342#endif
2343}
2344
2345
2346/**
2347 * Removes the page from the list of modified pages and resets the
2348 * moficiation counter.
2349 *
2350 * @param pPool The pool.
2351 * @param pPage The page which is believed to be in the list of modified pages.
2352 */
2353static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2354{
2355 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2356 if (pPool->iModifiedHead == pPage->idx)
2357 {
2358 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2359 pPool->iModifiedHead = pPage->iModifiedNext;
2360 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2361 {
2362 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2363 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2364 }
2365 pPool->cModifiedPages--;
2366 }
2367 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2368 {
2369 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2370 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2371 {
2372 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2373 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2374 }
2375 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2376 pPool->cModifiedPages--;
2377 }
2378 else
2379 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2380 pPage->cModifications = 0;
2381}
2382
2383
2384/**
2385 * Zaps the list of modified pages, resetting their modification counters in the process.
2386 *
2387 * @param pVM The VM handle.
2388 */
2389static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2390{
2391 pgmLock(pVM);
2392 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2393 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2394
2395 unsigned cPages = 0; NOREF(cPages);
2396
2397#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2398 pgmPoolResetDirtyPages(pVM, true /* force removal. */);
2399#endif
2400
2401 uint16_t idx = pPool->iModifiedHead;
2402 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2403 while (idx != NIL_PGMPOOL_IDX)
2404 {
2405 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2406 idx = pPage->iModifiedNext;
2407 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2408 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2409 pPage->cModifications = 0;
2410 Assert(++cPages);
2411 }
2412 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2413 pPool->cModifiedPages = 0;
2414 pgmUnlock(pVM);
2415}
2416
2417
2418#ifdef IN_RING3
2419/**
2420 * Callback to clear all shadow pages and clear all modification counters.
2421 *
2422 * @returns VBox status code.
2423 * @param pVM The VM handle.
2424 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
2425 * @param pvUser Unused parameter.
2426 *
2427 * @remark Should only be used when monitoring is available, thus placed in
2428 * the PGMPOOL_WITH_MONITORING \#ifdef.
2429 */
2430DECLCALLBACK(int) pgmPoolClearAll(PVM pVM, PVMCPU pVCpu, void *pvUser)
2431{
2432 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2433 STAM_PROFILE_START(&pPool->StatClearAll, c);
2434 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2435 NOREF(pvUser); NOREF(pVCpu);
2436
2437 pgmLock(pVM);
2438
2439 /*
2440 * Iterate all the pages until we've encountered all that in use.
2441 * This is simple but not quite optimal solution.
2442 */
2443 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2444 unsigned cLeft = pPool->cUsedPages;
2445 unsigned iPage = pPool->cCurPages;
2446 while (--iPage >= PGMPOOL_IDX_FIRST)
2447 {
2448 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2449 if (pPage->GCPhys != NIL_RTGCPHYS)
2450 {
2451 switch (pPage->enmKind)
2452 {
2453 /*
2454 * We only care about shadow page tables.
2455 */
2456 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2457 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2458 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2459 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2460 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2461 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2462 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2463 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2464 {
2465#ifdef PGMPOOL_WITH_USER_TRACKING
2466 if (pPage->cPresent)
2467#endif
2468 {
2469 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2470 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2471 ASMMemZeroPage(pvShw);
2472 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2473#ifdef PGMPOOL_WITH_USER_TRACKING
2474 pPage->cPresent = 0;
2475 pPage->iFirstPresent = ~0;
2476#endif
2477 }
2478 }
2479 /* fall thru */
2480
2481 default:
2482 Assert(!pPage->cModifications || ++cModifiedPages);
2483 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2484 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2485 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2486 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2487 pPage->cModifications = 0;
2488 break;
2489
2490 }
2491 if (!--cLeft)
2492 break;
2493 }
2494 }
2495
2496 /* swipe the special pages too. */
2497 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2498 {
2499 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2500 if (pPage->GCPhys != NIL_RTGCPHYS)
2501 {
2502 Assert(!pPage->cModifications || ++cModifiedPages);
2503 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2504 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2505 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2506 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2507 pPage->cModifications = 0;
2508 }
2509 }
2510
2511#ifndef DEBUG_michael
2512 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2513#endif
2514 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2515 pPool->cModifiedPages = 0;
2516
2517#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2518 /*
2519 * Clear all the GCPhys links and rebuild the phys ext free list.
2520 */
2521 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2522 pRam;
2523 pRam = pRam->CTX_SUFF(pNext))
2524 {
2525 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2526 while (iPage-- > 0)
2527 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
2528 }
2529
2530 pPool->iPhysExtFreeHead = 0;
2531 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2532 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2533 for (unsigned i = 0; i < cMaxPhysExts; i++)
2534 {
2535 paPhysExts[i].iNext = i + 1;
2536 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2537 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2538 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2539 }
2540 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2541#endif
2542
2543#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2544 /* Clear all dirty pages. */
2545 pPool->idxFreeDirtyPage = 0;
2546 pPool->cDirtyPages = 0;
2547 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
2548 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
2549#endif
2550
2551 /* Clear the PGM_SYNC_CLEAR_PGM_POOL flag on all VCPUs to prevent redundant flushes. */
2552 for (unsigned idCpu = 0; idCpu < pVM->cCPUs; idCpu++)
2553 {
2554 PVMCPU pVCpu = &pVM->aCpus[idCpu];
2555
2556 pVCpu->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2557 }
2558
2559 pPool->cPresent = 0;
2560 pgmUnlock(pVM);
2561 PGM_INVL_ALL_VCPU_TLBS(pVM);
2562 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2563 return VINF_SUCCESS;
2564}
2565#endif /* IN_RING3 */
2566
2567
2568/**
2569 * Handle SyncCR3 pool tasks
2570 *
2571 * @returns VBox status code.
2572 * @retval VINF_SUCCESS if successfully added.
2573 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2574 * @param pVCpu The VMCPU handle.
2575 * @remark Should only be used when monitoring is available, thus placed in
2576 * the PGMPOOL_WITH_MONITORING #ifdef.
2577 */
2578int pgmPoolSyncCR3(PVMCPU pVCpu)
2579{
2580 PVM pVM = pVCpu->CTX_SUFF(pVM);
2581 LogFlow(("pgmPoolSyncCR3\n"));
2582
2583 /*
2584 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2585 * Occasionally we will have to clear all the shadow page tables because we wanted
2586 * to monitor a page which was mapped by too many shadowed page tables. This operation
2587 * sometimes refered to as a 'lightweight flush'.
2588 */
2589# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2590 if (ASMBitTestAndClear(&pVCpu->pgm.s.fSyncFlags, PGM_SYNC_CLEAR_PGM_POOL_BIT))
2591 {
2592 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmPoolClearAll, NULL);
2593 AssertRC(rc);
2594 }
2595# else /* !IN_RING3 */
2596 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2597 {
2598 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2599 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2600 return VINF_PGM_SYNC_CR3;
2601 }
2602# endif /* !IN_RING3 */
2603 else
2604 pgmPoolMonitorModifiedClearAll(pVM);
2605
2606 return VINF_SUCCESS;
2607}
2608
2609#endif /* PGMPOOL_WITH_MONITORING */
2610#ifdef PGMPOOL_WITH_USER_TRACKING
2611
2612/**
2613 * Frees up at least one user entry.
2614 *
2615 * @returns VBox status code.
2616 * @retval VINF_SUCCESS if successfully added.
2617 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2618 * @param pPool The pool.
2619 * @param iUser The user index.
2620 */
2621static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2622{
2623 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2624#ifdef PGMPOOL_WITH_CACHE
2625 /*
2626 * Just free cached pages in a braindead fashion.
2627 */
2628 /** @todo walk the age list backwards and free the first with usage. */
2629 int rc = VINF_SUCCESS;
2630 do
2631 {
2632 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2633 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2634 rc = rc2;
2635 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2636 return rc;
2637#else
2638 /*
2639 * Lazy approach.
2640 */
2641 /* @todo This path no longer works (CR3 root pages will be flushed)!! */
2642 AssertCompileFailed();
2643 Assert(!CPUMIsGuestInLongMode(pVM));
2644 pgmPoolFlushAllInt(pPool);
2645 return VERR_PGM_POOL_FLUSHED;
2646#endif
2647}
2648
2649
2650/**
2651 * Inserts a page into the cache.
2652 *
2653 * This will create user node for the page, insert it into the GCPhys
2654 * hash, and insert it into the age list.
2655 *
2656 * @returns VBox status code.
2657 * @retval VINF_SUCCESS if successfully added.
2658 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2659 * @param pPool The pool.
2660 * @param pPage The cached page.
2661 * @param GCPhys The GC physical address of the page we're gonna shadow.
2662 * @param iUser The user index.
2663 * @param iUserTable The user table index.
2664 */
2665DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2666{
2667 int rc = VINF_SUCCESS;
2668 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2669
2670 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2671
2672#ifdef VBOX_STRICT
2673 /*
2674 * Check that the entry doesn't already exists.
2675 */
2676 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2677 {
2678 uint16_t i = pPage->iUserHead;
2679 do
2680 {
2681 Assert(i < pPool->cMaxUsers);
2682 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2683 i = paUsers[i].iNext;
2684 } while (i != NIL_PGMPOOL_USER_INDEX);
2685 }
2686#endif
2687
2688 /*
2689 * Find free a user node.
2690 */
2691 uint16_t i = pPool->iUserFreeHead;
2692 if (i == NIL_PGMPOOL_USER_INDEX)
2693 {
2694 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2695 if (RT_FAILURE(rc))
2696 return rc;
2697 i = pPool->iUserFreeHead;
2698 }
2699
2700 /*
2701 * Unlink the user node from the free list,
2702 * initialize and insert it into the user list.
2703 */
2704 pPool->iUserFreeHead = paUsers[i].iNext;
2705 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2706 paUsers[i].iUser = iUser;
2707 paUsers[i].iUserTable = iUserTable;
2708 pPage->iUserHead = i;
2709
2710 /*
2711 * Insert into cache and enable monitoring of the guest page if enabled.
2712 *
2713 * Until we implement caching of all levels, including the CR3 one, we'll
2714 * have to make sure we don't try monitor & cache any recursive reuse of
2715 * a monitored CR3 page. Because all windows versions are doing this we'll
2716 * have to be able to do combined access monitoring, CR3 + PT and
2717 * PD + PT (guest PAE).
2718 *
2719 * Update:
2720 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2721 */
2722#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2723# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2724 const bool fCanBeMonitored = true;
2725# else
2726 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2727 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2728 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2729# endif
2730# ifdef PGMPOOL_WITH_CACHE
2731 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2732# endif
2733 if (fCanBeMonitored)
2734 {
2735# ifdef PGMPOOL_WITH_MONITORING
2736 rc = pgmPoolMonitorInsert(pPool, pPage);
2737 AssertRC(rc);
2738 }
2739# endif
2740#endif /* PGMPOOL_WITH_MONITORING */
2741 return rc;
2742}
2743
2744
2745# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2746/**
2747 * Adds a user reference to a page.
2748 *
2749 * This will move the page to the head of the
2750 *
2751 * @returns VBox status code.
2752 * @retval VINF_SUCCESS if successfully added.
2753 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2754 * @param pPool The pool.
2755 * @param pPage The cached page.
2756 * @param iUser The user index.
2757 * @param iUserTable The user table.
2758 */
2759static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2760{
2761 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2762
2763 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2764
2765# ifdef VBOX_STRICT
2766 /*
2767 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2768 */
2769 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2770 {
2771 uint16_t i = pPage->iUserHead;
2772 do
2773 {
2774 Assert(i < pPool->cMaxUsers);
2775 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2776 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2777 i = paUsers[i].iNext;
2778 } while (i != NIL_PGMPOOL_USER_INDEX);
2779 }
2780# endif
2781
2782 /*
2783 * Allocate a user node.
2784 */
2785 uint16_t i = pPool->iUserFreeHead;
2786 if (i == NIL_PGMPOOL_USER_INDEX)
2787 {
2788 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2789 if (RT_FAILURE(rc))
2790 return rc;
2791 i = pPool->iUserFreeHead;
2792 }
2793 pPool->iUserFreeHead = paUsers[i].iNext;
2794
2795 /*
2796 * Initialize the user node and insert it.
2797 */
2798 paUsers[i].iNext = pPage->iUserHead;
2799 paUsers[i].iUser = iUser;
2800 paUsers[i].iUserTable = iUserTable;
2801 pPage->iUserHead = i;
2802
2803# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2804 if (pPage->fDirty)
2805 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, true /* force removal */);
2806# endif
2807
2808# ifdef PGMPOOL_WITH_CACHE
2809 /*
2810 * Tell the cache to update its replacement stats for this page.
2811 */
2812 pgmPoolCacheUsed(pPool, pPage);
2813# endif
2814 return VINF_SUCCESS;
2815}
2816# endif /* PGMPOOL_WITH_CACHE */
2817
2818
2819/**
2820 * Frees a user record associated with a page.
2821 *
2822 * This does not clear the entry in the user table, it simply replaces the
2823 * user record to the chain of free records.
2824 *
2825 * @param pPool The pool.
2826 * @param HCPhys The HC physical address of the shadow page.
2827 * @param iUser The shadow page pool index of the user table.
2828 * @param iUserTable The index into the user table (shadowed).
2829 */
2830static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2831{
2832 /*
2833 * Unlink and free the specified user entry.
2834 */
2835 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2836
2837 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2838 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2839 uint16_t i = pPage->iUserHead;
2840 if ( i != NIL_PGMPOOL_USER_INDEX
2841 && paUsers[i].iUser == iUser
2842 && paUsers[i].iUserTable == iUserTable)
2843 {
2844 pPage->iUserHead = paUsers[i].iNext;
2845
2846 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2847 paUsers[i].iNext = pPool->iUserFreeHead;
2848 pPool->iUserFreeHead = i;
2849 return;
2850 }
2851
2852 /* General: Linear search. */
2853 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2854 while (i != NIL_PGMPOOL_USER_INDEX)
2855 {
2856 if ( paUsers[i].iUser == iUser
2857 && paUsers[i].iUserTable == iUserTable)
2858 {
2859 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2860 paUsers[iPrev].iNext = paUsers[i].iNext;
2861 else
2862 pPage->iUserHead = paUsers[i].iNext;
2863
2864 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2865 paUsers[i].iNext = pPool->iUserFreeHead;
2866 pPool->iUserFreeHead = i;
2867 return;
2868 }
2869 iPrev = i;
2870 i = paUsers[i].iNext;
2871 }
2872
2873 /* Fatal: didn't find it */
2874 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2875 iUser, iUserTable, pPage->GCPhys));
2876}
2877
2878
2879/**
2880 * Gets the entry size of a shadow table.
2881 *
2882 * @param enmKind The kind of page.
2883 *
2884 * @returns The size of the entry in bytes. That is, 4 or 8.
2885 * @returns If the kind is not for a table, an assertion is raised and 0 is
2886 * returned.
2887 */
2888DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2889{
2890 switch (enmKind)
2891 {
2892 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2893 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2894 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2895 case PGMPOOLKIND_32BIT_PD:
2896 case PGMPOOLKIND_32BIT_PD_PHYS:
2897 return 4;
2898
2899 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2900 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2901 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2902 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2903 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2904 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2905 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2906 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2907 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2908 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2909 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2910 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2911 case PGMPOOLKIND_64BIT_PML4:
2912 case PGMPOOLKIND_PAE_PDPT:
2913 case PGMPOOLKIND_ROOT_NESTED:
2914 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2915 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2916 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2917 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2918 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2919 case PGMPOOLKIND_PAE_PD_PHYS:
2920 case PGMPOOLKIND_PAE_PDPT_PHYS:
2921 return 8;
2922
2923 default:
2924 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2925 }
2926}
2927
2928
2929/**
2930 * Gets the entry size of a guest table.
2931 *
2932 * @param enmKind The kind of page.
2933 *
2934 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2935 * @returns If the kind is not for a table, an assertion is raised and 0 is
2936 * returned.
2937 */
2938DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2939{
2940 switch (enmKind)
2941 {
2942 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2943 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2944 case PGMPOOLKIND_32BIT_PD:
2945 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2946 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2947 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2948 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2949 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2950 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2951 return 4;
2952
2953 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2954 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2955 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2956 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2957 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2958 case PGMPOOLKIND_64BIT_PML4:
2959 case PGMPOOLKIND_PAE_PDPT:
2960 return 8;
2961
2962 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2963 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2964 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2965 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2966 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2967 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2968 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2969 case PGMPOOLKIND_ROOT_NESTED:
2970 case PGMPOOLKIND_PAE_PD_PHYS:
2971 case PGMPOOLKIND_PAE_PDPT_PHYS:
2972 case PGMPOOLKIND_32BIT_PD_PHYS:
2973 /** @todo can we return 0? (nobody is calling this...) */
2974 AssertFailed();
2975 return 0;
2976
2977 default:
2978 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2979 }
2980}
2981
2982#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2983
2984/**
2985 * Scans one shadow page table for mappings of a physical page.
2986 *
2987 * @param pVM The VM handle.
2988 * @param pPhysPage The guest page in question.
2989 * @param iShw The shadow page table.
2990 * @param cRefs The number of references made in that PT.
2991 */
2992static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2993{
2994 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
2995 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2996
2997 /*
2998 * Assert sanity.
2999 */
3000 Assert(cRefs == 1);
3001 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3002 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3003
3004 /*
3005 * Then, clear the actual mappings to the page in the shadow PT.
3006 */
3007 switch (pPage->enmKind)
3008 {
3009 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3010 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3011 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3012 {
3013 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3014 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3015 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3016 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3017 {
3018 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
3019 pPT->a[i].u = 0;
3020 cRefs--;
3021 if (!cRefs)
3022 return;
3023 }
3024#ifdef LOG_ENABLED
3025 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3026 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3027 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3028 {
3029 Log(("i=%d cRefs=%d\n", i, cRefs--));
3030 }
3031#endif
3032 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3033 break;
3034 }
3035
3036 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3037 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3038 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3039 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3040 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3041 {
3042 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3043 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3044 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3045 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3046 {
3047 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3048 pPT->a[i].u = 0;
3049 cRefs--;
3050 if (!cRefs)
3051 return;
3052 }
3053#ifdef LOG_ENABLED
3054 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3055 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3056 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3057 {
3058 Log(("i=%d cRefs=%d\n", i, cRefs--));
3059 }
3060#endif
3061 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
3062 break;
3063 }
3064
3065 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3066 {
3067 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3068 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3069 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3070 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3071 {
3072 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3073 pPT->a[i].u = 0;
3074 cRefs--;
3075 if (!cRefs)
3076 return;
3077 }
3078#ifdef LOG_ENABLED
3079 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3080 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3081 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3082 {
3083 Log(("i=%d cRefs=%d\n", i, cRefs--));
3084 }
3085#endif
3086 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3087 break;
3088 }
3089
3090 default:
3091 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3092 }
3093}
3094
3095
3096/**
3097 * Scans one shadow page table for mappings of a physical page.
3098 *
3099 * @param pVM The VM handle.
3100 * @param pPhysPage The guest page in question.
3101 * @param iShw The shadow page table.
3102 * @param cRefs The number of references made in that PT.
3103 */
3104void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
3105{
3106 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3107 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
3108 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3109 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
3110 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3111 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3112}
3113
3114
3115/**
3116 * Flushes a list of shadow page tables mapping the same physical page.
3117 *
3118 * @param pVM The VM handle.
3119 * @param pPhysPage The guest page in question.
3120 * @param iPhysExt The physical cross reference extent list to flush.
3121 */
3122void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
3123{
3124 Assert(PGMIsLockOwner(pVM));
3125 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3126 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3127 LogFlow(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%R[pgmpage] iPhysExt\n", pPhysPage, iPhysExt));
3128
3129 const uint16_t iPhysExtStart = iPhysExt;
3130 PPGMPOOLPHYSEXT pPhysExt;
3131 do
3132 {
3133 Assert(iPhysExt < pPool->cMaxPhysExts);
3134 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3135 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3136 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3137 {
3138 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
3139 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3140 }
3141
3142 /* next */
3143 iPhysExt = pPhysExt->iNext;
3144 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3145
3146 /* insert the list into the free list and clear the ram range entry. */
3147 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3148 pPool->iPhysExtFreeHead = iPhysExtStart;
3149 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3150
3151 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3152}
3153
3154#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3155
3156/**
3157 * Flushes all shadow page table mappings of the given guest page.
3158 *
3159 * This is typically called when the host page backing the guest one has been
3160 * replaced or when the page protection was changed due to an access handler.
3161 *
3162 * @returns VBox status code.
3163 * @retval VINF_SUCCESS if all references has been successfully cleared.
3164 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3165 * pool cleaning. FF and sync flags are set.
3166 *
3167 * @param pVM The VM handle.
3168 * @param pPhysPage The guest page in question.
3169 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3170 * flushed, it is NOT touched if this isn't necessary.
3171 * The caller MUST initialized this to @a false.
3172 */
3173int pgmPoolTrackFlushGCPhys(PVM pVM, PPGMPAGE pPhysPage, bool *pfFlushTLBs)
3174{
3175 PVMCPU pVCpu = VMMGetCpu(pVM);
3176 pgmLock(pVM);
3177 int rc = VINF_SUCCESS;
3178#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3179 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3180 if (u16)
3181 {
3182 /*
3183 * The zero page is currently screwing up the tracking and we'll
3184 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3185 * is defined, zero pages won't normally be mapped. Some kind of solution
3186 * will be needed for this problem of course, but it will have to wait...
3187 */
3188 if (PGM_PAGE_IS_ZERO(pPhysPage))
3189 rc = VINF_PGM_GCPHYS_ALIASED;
3190 else
3191 {
3192# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3193 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3194 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3195 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3196# endif
3197
3198 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3199 pgmPoolTrackFlushGCPhysPT(pVM,
3200 pPhysPage,
3201 PGMPOOL_TD_GET_IDX(u16),
3202 PGMPOOL_TD_GET_CREFS(u16));
3203 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3204 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, PGMPOOL_TD_GET_IDX(u16));
3205 else
3206 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3207 *pfFlushTLBs = true;
3208
3209# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3210 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3211# endif
3212 }
3213 }
3214
3215#elif defined(PGMPOOL_WITH_CACHE)
3216 if (PGM_PAGE_IS_ZERO(pPhysPage))
3217 rc = VINF_PGM_GCPHYS_ALIASED;
3218 else
3219 {
3220# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3221 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow kills the pool otherwise. */
3222 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3223# endif
3224 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3225 if (rc == VINF_SUCCESS)
3226 *pfFlushTLBs = true;
3227 }
3228
3229# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3230 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3231# endif
3232
3233#else
3234 rc = VINF_PGM_GCPHYS_ALIASED;
3235#endif
3236
3237 if (rc == VINF_PGM_GCPHYS_ALIASED)
3238 {
3239 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3240 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3241 rc = VINF_PGM_SYNC_CR3;
3242 }
3243 pgmUnlock(pVM);
3244 return rc;
3245}
3246
3247
3248/**
3249 * Scans all shadow page tables for mappings of a physical page.
3250 *
3251 * This may be slow, but it's most likely more efficient than cleaning
3252 * out the entire page pool / cache.
3253 *
3254 * @returns VBox status code.
3255 * @retval VINF_SUCCESS if all references has been successfully cleared.
3256 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3257 * a page pool cleaning.
3258 *
3259 * @param pVM The VM handle.
3260 * @param pPhysPage The guest page in question.
3261 */
3262int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3263{
3264 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3265 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3266 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3267 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3268
3269#if 1
3270 /*
3271 * There is a limit to what makes sense.
3272 */
3273 if (pPool->cPresent > 1024)
3274 {
3275 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3276 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3277 return VINF_PGM_GCPHYS_ALIASED;
3278 }
3279#endif
3280
3281 /*
3282 * Iterate all the pages until we've encountered all that in use.
3283 * This is simple but not quite optimal solution.
3284 */
3285 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3286 const uint32_t u32 = u64;
3287 unsigned cLeft = pPool->cUsedPages;
3288 unsigned iPage = pPool->cCurPages;
3289 while (--iPage >= PGMPOOL_IDX_FIRST)
3290 {
3291 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3292 if (pPage->GCPhys != NIL_RTGCPHYS)
3293 {
3294 switch (pPage->enmKind)
3295 {
3296 /*
3297 * We only care about shadow page tables.
3298 */
3299 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3300 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3301 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3302 {
3303 unsigned cPresent = pPage->cPresent;
3304 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3305 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3306 if (pPT->a[i].n.u1Present)
3307 {
3308 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3309 {
3310 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3311 pPT->a[i].u = 0;
3312 }
3313 if (!--cPresent)
3314 break;
3315 }
3316 break;
3317 }
3318
3319 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3320 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3321 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3322 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3323 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3324 {
3325 unsigned cPresent = pPage->cPresent;
3326 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3327 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3328 if (pPT->a[i].n.u1Present)
3329 {
3330 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3331 {
3332 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3333 pPT->a[i].u = 0;
3334 }
3335 if (!--cPresent)
3336 break;
3337 }
3338 break;
3339 }
3340 }
3341 if (!--cLeft)
3342 break;
3343 }
3344 }
3345
3346 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3347 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3348 return VINF_SUCCESS;
3349}
3350
3351
3352/**
3353 * Clears the user entry in a user table.
3354 *
3355 * This is used to remove all references to a page when flushing it.
3356 */
3357static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3358{
3359 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3360 Assert(pUser->iUser < pPool->cCurPages);
3361 uint32_t iUserTable = pUser->iUserTable;
3362
3363 /*
3364 * Map the user page.
3365 */
3366 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3367 union
3368 {
3369 uint64_t *pau64;
3370 uint32_t *pau32;
3371 } u;
3372 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3373
3374 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3375
3376 /* Safety precaution in case we change the paging for other modes too in the future. */
3377 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3378
3379#ifdef VBOX_STRICT
3380 /*
3381 * Some sanity checks.
3382 */
3383 switch (pUserPage->enmKind)
3384 {
3385 case PGMPOOLKIND_32BIT_PD:
3386 case PGMPOOLKIND_32BIT_PD_PHYS:
3387 Assert(iUserTable < X86_PG_ENTRIES);
3388 break;
3389 case PGMPOOLKIND_PAE_PDPT:
3390 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3391 case PGMPOOLKIND_PAE_PDPT_PHYS:
3392 Assert(iUserTable < 4);
3393 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3394 break;
3395 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3396 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3397 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3398 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3399 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3400 case PGMPOOLKIND_PAE_PD_PHYS:
3401 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3402 break;
3403 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3404 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3405 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3406 break;
3407 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3408 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3409 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3410 break;
3411 case PGMPOOLKIND_64BIT_PML4:
3412 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3413 /* GCPhys >> PAGE_SHIFT is the index here */
3414 break;
3415 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3416 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3417 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3418 break;
3419
3420 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3421 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3422 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3423 break;
3424
3425 case PGMPOOLKIND_ROOT_NESTED:
3426 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3427 break;
3428
3429 default:
3430 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3431 break;
3432 }
3433#endif /* VBOX_STRICT */
3434
3435 /*
3436 * Clear the entry in the user page.
3437 */
3438 switch (pUserPage->enmKind)
3439 {
3440 /* 32-bit entries */
3441 case PGMPOOLKIND_32BIT_PD:
3442 case PGMPOOLKIND_32BIT_PD_PHYS:
3443 u.pau32[iUserTable] = 0;
3444 break;
3445
3446 /* 64-bit entries */
3447 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3448 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3449 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3450 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3451 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3452#if defined(IN_RC)
3453 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3454 * non-present PDPT will continue to cause page faults.
3455 */
3456 ASMReloadCR3();
3457#endif
3458 /* no break */
3459 case PGMPOOLKIND_PAE_PD_PHYS:
3460 case PGMPOOLKIND_PAE_PDPT_PHYS:
3461 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3462 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3463 case PGMPOOLKIND_64BIT_PML4:
3464 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3465 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3466 case PGMPOOLKIND_PAE_PDPT:
3467 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3468 case PGMPOOLKIND_ROOT_NESTED:
3469 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3470 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3471 u.pau64[iUserTable] = 0;
3472 break;
3473
3474 default:
3475 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3476 }
3477}
3478
3479
3480/**
3481 * Clears all users of a page.
3482 */
3483static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3484{
3485 /*
3486 * Free all the user records.
3487 */
3488 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3489
3490 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3491 uint16_t i = pPage->iUserHead;
3492 while (i != NIL_PGMPOOL_USER_INDEX)
3493 {
3494 /* Clear enter in user table. */
3495 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3496
3497 /* Free it. */
3498 const uint16_t iNext = paUsers[i].iNext;
3499 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3500 paUsers[i].iNext = pPool->iUserFreeHead;
3501 pPool->iUserFreeHead = i;
3502
3503 /* Next. */
3504 i = iNext;
3505 }
3506 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3507}
3508
3509#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3510
3511/**
3512 * Allocates a new physical cross reference extent.
3513 *
3514 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3515 * @param pVM The VM handle.
3516 * @param piPhysExt Where to store the phys ext index.
3517 */
3518PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3519{
3520 Assert(PGMIsLockOwner(pVM));
3521 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3522 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3523 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3524 {
3525 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3526 return NULL;
3527 }
3528 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3529 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3530 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3531 *piPhysExt = iPhysExt;
3532 return pPhysExt;
3533}
3534
3535
3536/**
3537 * Frees a physical cross reference extent.
3538 *
3539 * @param pVM The VM handle.
3540 * @param iPhysExt The extent to free.
3541 */
3542void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3543{
3544 Assert(PGMIsLockOwner(pVM));
3545 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3546 Assert(iPhysExt < pPool->cMaxPhysExts);
3547 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3548 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3549 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3550 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3551 pPool->iPhysExtFreeHead = iPhysExt;
3552}
3553
3554
3555/**
3556 * Frees a physical cross reference extent.
3557 *
3558 * @param pVM The VM handle.
3559 * @param iPhysExt The extent to free.
3560 */
3561void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3562{
3563 Assert(PGMIsLockOwner(pVM));
3564 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3565
3566 const uint16_t iPhysExtStart = iPhysExt;
3567 PPGMPOOLPHYSEXT pPhysExt;
3568 do
3569 {
3570 Assert(iPhysExt < pPool->cMaxPhysExts);
3571 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3572 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3573 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3574
3575 /* next */
3576 iPhysExt = pPhysExt->iNext;
3577 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3578
3579 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3580 pPool->iPhysExtFreeHead = iPhysExtStart;
3581}
3582
3583
3584/**
3585 * Insert a reference into a list of physical cross reference extents.
3586 *
3587 * @returns The new tracking data for PGMPAGE.
3588 *
3589 * @param pVM The VM handle.
3590 * @param iPhysExt The physical extent index of the list head.
3591 * @param iShwPT The shadow page table index.
3592 *
3593 */
3594static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3595{
3596 Assert(PGMIsLockOwner(pVM));
3597 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3598 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3599
3600 /* special common case. */
3601 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3602 {
3603 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3604 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3605 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d}\n", iPhysExt, iShwPT));
3606 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3607 }
3608
3609 /* general treatment. */
3610 const uint16_t iPhysExtStart = iPhysExt;
3611 unsigned cMax = 15;
3612 for (;;)
3613 {
3614 Assert(iPhysExt < pPool->cMaxPhysExts);
3615 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3616 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3617 {
3618 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3619 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3620 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3621 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3622 }
3623 if (!--cMax)
3624 {
3625 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3626 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3627 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3628 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3629 }
3630 }
3631
3632 /* add another extent to the list. */
3633 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3634 if (!pNew)
3635 {
3636 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3637 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3638 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3639 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3640 }
3641 pNew->iNext = iPhysExtStart;
3642 pNew->aidx[0] = iShwPT;
3643 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3644 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3645}
3646
3647
3648/**
3649 * Add a reference to guest physical page where extents are in use.
3650 *
3651 * @returns The new tracking data for PGMPAGE.
3652 *
3653 * @param pVM The VM handle.
3654 * @param u16 The ram range flags (top 16-bits).
3655 * @param iShwPT The shadow page table index.
3656 */
3657uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3658{
3659 pgmLock(pVM);
3660 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3661 {
3662 /*
3663 * Convert to extent list.
3664 */
3665 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3666 uint16_t iPhysExt;
3667 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3668 if (pPhysExt)
3669 {
3670 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3671 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3672 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3673 pPhysExt->aidx[1] = iShwPT;
3674 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3675 }
3676 else
3677 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3678 }
3679 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3680 {
3681 /*
3682 * Insert into the extent list.
3683 */
3684 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3685 }
3686 else
3687 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3688 pgmUnlock(pVM);
3689 return u16;
3690}
3691
3692
3693/**
3694 * Clear references to guest physical memory.
3695 *
3696 * @param pPool The pool.
3697 * @param pPage The page.
3698 * @param pPhysPage Pointer to the aPages entry in the ram range.
3699 */
3700void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3701{
3702 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3703 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3704
3705 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3706 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3707 {
3708 PVM pVM = pPool->CTX_SUFF(pVM);
3709 pgmLock(pVM);
3710
3711 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3712 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3713 do
3714 {
3715 Assert(iPhysExt < pPool->cMaxPhysExts);
3716
3717 /*
3718 * Look for the shadow page and check if it's all freed.
3719 */
3720 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3721 {
3722 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3723 {
3724 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3725
3726 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3727 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3728 {
3729 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3730 pgmUnlock(pVM);
3731 return;
3732 }
3733
3734 /* we can free the node. */
3735 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3736 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3737 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3738 {
3739 /* lonely node */
3740 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3741 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3742 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3743 }
3744 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3745 {
3746 /* head */
3747 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3748 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3749 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3750 }
3751 else
3752 {
3753 /* in list */
3754 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3755 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3756 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3757 }
3758 iPhysExt = iPhysExtNext;
3759 pgmUnlock(pVM);
3760 return;
3761 }
3762 }
3763
3764 /* next */
3765 iPhysExtPrev = iPhysExt;
3766 iPhysExt = paPhysExts[iPhysExt].iNext;
3767 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3768
3769 pgmUnlock(pVM);
3770 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3771 }
3772 else /* nothing to do */
3773 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3774}
3775
3776
3777/**
3778 * Clear references to guest physical memory.
3779 *
3780 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3781 * is assumed to be correct, so the linear search can be skipped and we can assert
3782 * at an earlier point.
3783 *
3784 * @param pPool The pool.
3785 * @param pPage The page.
3786 * @param HCPhys The host physical address corresponding to the guest page.
3787 * @param GCPhys The guest physical address corresponding to HCPhys.
3788 */
3789static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3790{
3791 /*
3792 * Walk range list.
3793 */
3794 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3795 while (pRam)
3796 {
3797 RTGCPHYS off = GCPhys - pRam->GCPhys;
3798 if (off < pRam->cb)
3799 {
3800 /* does it match? */
3801 const unsigned iPage = off >> PAGE_SHIFT;
3802 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3803#ifdef LOG_ENABLED
3804RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3805Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3806#endif
3807 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3808 {
3809 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3810 return;
3811 }
3812 break;
3813 }
3814 pRam = pRam->CTX_SUFF(pNext);
3815 }
3816 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3817}
3818
3819
3820/**
3821 * Clear references to guest physical memory.
3822 *
3823 * @param pPool The pool.
3824 * @param pPage The page.
3825 * @param HCPhys The host physical address corresponding to the guest page.
3826 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3827 */
3828void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3829{
3830 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
3831
3832 /*
3833 * Walk range list.
3834 */
3835 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3836 while (pRam)
3837 {
3838 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3839 if (off < pRam->cb)
3840 {
3841 /* does it match? */
3842 const unsigned iPage = off >> PAGE_SHIFT;
3843 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3844 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3845 {
3846 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3847 return;
3848 }
3849 break;
3850 }
3851 pRam = pRam->CTX_SUFF(pNext);
3852 }
3853
3854 /*
3855 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3856 */
3857 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3858 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3859 while (pRam)
3860 {
3861 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3862 while (iPage-- > 0)
3863 {
3864 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3865 {
3866 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3867 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3868 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3869 return;
3870 }
3871 }
3872 pRam = pRam->CTX_SUFF(pNext);
3873 }
3874
3875 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3876}
3877
3878
3879/**
3880 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3881 *
3882 * @param pPool The pool.
3883 * @param pPage The page.
3884 * @param pShwPT The shadow page table (mapping of the page).
3885 * @param pGstPT The guest page table.
3886 */
3887DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3888{
3889 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3890 if (pShwPT->a[i].n.u1Present)
3891 {
3892 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3893 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3894 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3895 if (!--pPage->cPresent)
3896 break;
3897 }
3898}
3899
3900
3901/**
3902 * Clear references to guest physical memory in a PAE / 32-bit page table.
3903 *
3904 * @param pPool The pool.
3905 * @param pPage The page.
3906 * @param pShwPT The shadow page table (mapping of the page).
3907 * @param pGstPT The guest page table (just a half one).
3908 */
3909DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3910{
3911 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3912 if (pShwPT->a[i].n.u1Present)
3913 {
3914 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
3915 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3916 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3917 }
3918}
3919
3920
3921/**
3922 * Clear references to guest physical memory in a PAE / PAE page table.
3923 *
3924 * @param pPool The pool.
3925 * @param pPage The page.
3926 * @param pShwPT The shadow page table (mapping of the page).
3927 * @param pGstPT The guest page table.
3928 */
3929DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3930{
3931 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3932 if (pShwPT->a[i].n.u1Present)
3933 {
3934 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3935 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3936 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3937 }
3938}
3939
3940
3941/**
3942 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3943 *
3944 * @param pPool The pool.
3945 * @param pPage The page.
3946 * @param pShwPT The shadow page table (mapping of the page).
3947 */
3948DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3949{
3950 RTGCPHYS GCPhys = pPage->GCPhys;
3951 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3952 if (pShwPT->a[i].n.u1Present)
3953 {
3954 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3955 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3956 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3957 }
3958}
3959
3960
3961/**
3962 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3963 *
3964 * @param pPool The pool.
3965 * @param pPage The page.
3966 * @param pShwPT The shadow page table (mapping of the page).
3967 */
3968DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3969{
3970 RTGCPHYS GCPhys = pPage->GCPhys;
3971 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3972 if (pShwPT->a[i].n.u1Present)
3973 {
3974 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3975 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3976 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3977 }
3978}
3979
3980#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3981
3982
3983/**
3984 * Clear references to shadowed pages in a 32 bits page directory.
3985 *
3986 * @param pPool The pool.
3987 * @param pPage The page.
3988 * @param pShwPD The shadow page directory (mapping of the page).
3989 */
3990DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
3991{
3992 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3993 {
3994 if ( pShwPD->a[i].n.u1Present
3995 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3996 )
3997 {
3998 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
3999 if (pSubPage)
4000 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4001 else
4002 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4003 }
4004 }
4005}
4006
4007/**
4008 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4009 *
4010 * @param pPool The pool.
4011 * @param pPage The page.
4012 * @param pShwPD The shadow page directory (mapping of the page).
4013 */
4014DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4015{
4016 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4017 {
4018 if ( pShwPD->a[i].n.u1Present
4019 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4020 )
4021 {
4022 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4023 if (pSubPage)
4024 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4025 else
4026 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4027 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4028 }
4029 }
4030}
4031
4032/**
4033 * Clear references to shadowed pages in a PAE page directory pointer table.
4034 *
4035 * @param pPool The pool.
4036 * @param pPage The page.
4037 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4038 */
4039DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4040{
4041 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4042 {
4043 if ( pShwPDPT->a[i].n.u1Present
4044 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4045 )
4046 {
4047 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4048 if (pSubPage)
4049 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4050 else
4051 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4052 }
4053 }
4054}
4055
4056
4057/**
4058 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4059 *
4060 * @param pPool The pool.
4061 * @param pPage The page.
4062 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4063 */
4064DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4065{
4066 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4067 {
4068 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
4069 if (pShwPDPT->a[i].n.u1Present)
4070 {
4071 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4072 if (pSubPage)
4073 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4074 else
4075 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4076 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4077 }
4078 }
4079}
4080
4081
4082/**
4083 * Clear references to shadowed pages in a 64-bit level 4 page table.
4084 *
4085 * @param pPool The pool.
4086 * @param pPage The page.
4087 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4088 */
4089DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4090{
4091 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4092 {
4093 if (pShwPML4->a[i].n.u1Present)
4094 {
4095 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4096 if (pSubPage)
4097 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4098 else
4099 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4100 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4101 }
4102 }
4103}
4104
4105
4106/**
4107 * Clear references to shadowed pages in an EPT page table.
4108 *
4109 * @param pPool The pool.
4110 * @param pPage The page.
4111 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4112 */
4113DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4114{
4115 RTGCPHYS GCPhys = pPage->GCPhys;
4116 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4117 if (pShwPT->a[i].n.u1Present)
4118 {
4119 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4120 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4121 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
4122 }
4123}
4124
4125
4126/**
4127 * Clear references to shadowed pages in an EPT page directory.
4128 *
4129 * @param pPool The pool.
4130 * @param pPage The page.
4131 * @param pShwPD The shadow page directory (mapping of the page).
4132 */
4133DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4134{
4135 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4136 {
4137 if (pShwPD->a[i].n.u1Present)
4138 {
4139 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4140 if (pSubPage)
4141 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4142 else
4143 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4144 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4145 }
4146 }
4147}
4148
4149
4150/**
4151 * Clear references to shadowed pages in an EPT page directory pointer table.
4152 *
4153 * @param pPool The pool.
4154 * @param pPage The page.
4155 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4156 */
4157DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4158{
4159 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4160 {
4161 if (pShwPDPT->a[i].n.u1Present)
4162 {
4163 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4164 if (pSubPage)
4165 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4166 else
4167 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4168 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4169 }
4170 }
4171}
4172
4173
4174/**
4175 * Clears all references made by this page.
4176 *
4177 * This includes other shadow pages and GC physical addresses.
4178 *
4179 * @param pPool The pool.
4180 * @param pPage The page.
4181 */
4182static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4183{
4184 /*
4185 * Map the shadow page and take action according to the page kind.
4186 */
4187 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
4188 switch (pPage->enmKind)
4189 {
4190#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4191 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4192 {
4193 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4194 void *pvGst;
4195 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4196 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4197 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4198 break;
4199 }
4200
4201 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4202 {
4203 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4204 void *pvGst;
4205 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4206 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
4207 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4208 break;
4209 }
4210
4211 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4212 {
4213 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4214 void *pvGst;
4215 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4216 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
4217 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4218 break;
4219 }
4220
4221 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4222 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4223 {
4224 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4225 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4226 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4227 break;
4228 }
4229
4230 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4231 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4232 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4233 {
4234 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4235 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4236 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4237 break;
4238 }
4239
4240#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4241 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4242 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4243 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4244 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4245 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4246 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4247 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4248 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4249 break;
4250#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4251
4252 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4253 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4254 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4255 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4256 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4257 case PGMPOOLKIND_PAE_PD_PHYS:
4258 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4259 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4260 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4261 break;
4262
4263 case PGMPOOLKIND_32BIT_PD_PHYS:
4264 case PGMPOOLKIND_32BIT_PD:
4265 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4266 break;
4267
4268 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4269 case PGMPOOLKIND_PAE_PDPT:
4270 case PGMPOOLKIND_PAE_PDPT_PHYS:
4271 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4272 break;
4273
4274 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4275 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4276 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4277 break;
4278
4279 case PGMPOOLKIND_64BIT_PML4:
4280 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4281 break;
4282
4283 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4284 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4285 break;
4286
4287 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4288 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4289 break;
4290
4291 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4292 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4293 break;
4294
4295 default:
4296 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4297 }
4298
4299 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4300 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4301 ASMMemZeroPage(pvShw);
4302 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4303 pPage->fZeroed = true;
4304 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4305}
4306#endif /* PGMPOOL_WITH_USER_TRACKING */
4307
4308/**
4309 * Flushes a pool page.
4310 *
4311 * This moves the page to the free list after removing all user references to it.
4312 *
4313 * @returns VBox status code.
4314 * @retval VINF_SUCCESS on success.
4315 * @param pPool The pool.
4316 * @param HCPhys The HC physical address of the shadow page.
4317 */
4318int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4319{
4320 PVM pVM = pPool->CTX_SUFF(pVM);
4321
4322 int rc = VINF_SUCCESS;
4323 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4324 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4325 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4326
4327 /*
4328 * Quietly reject any attempts at flushing any of the special root pages.
4329 */
4330 if (pPage->idx < PGMPOOL_IDX_FIRST)
4331 {
4332 AssertFailed(); /* can no longer happen */
4333 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4334 return VINF_SUCCESS;
4335 }
4336
4337 pgmLock(pVM);
4338
4339 /*
4340 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4341 */
4342 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4343 {
4344 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4345 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4346 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4347 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4348 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4349 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4350 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4351 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4352 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4353 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4354 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4355 pgmUnlock(pVM);
4356 return VINF_SUCCESS;
4357 }
4358
4359#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4360 /* Start a subset so we won't run out of mapping space. */
4361 PVMCPU pVCpu = VMMGetCpu(pVM);
4362 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4363#endif
4364
4365 /*
4366 * Mark the page as being in need of an ASMMemZeroPage().
4367 */
4368 pPage->fZeroed = false;
4369
4370#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4371 if (pPage->fDirty)
4372 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, true /* force removal */);
4373#endif
4374
4375#ifdef PGMPOOL_WITH_USER_TRACKING
4376 /*
4377 * Clear the page.
4378 */
4379 pgmPoolTrackClearPageUsers(pPool, pPage);
4380 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4381 pgmPoolTrackDeref(pPool, pPage);
4382 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4383#endif
4384
4385#ifdef PGMPOOL_WITH_CACHE
4386 /*
4387 * Flush it from the cache.
4388 */
4389 pgmPoolCacheFlushPage(pPool, pPage);
4390#endif /* PGMPOOL_WITH_CACHE */
4391
4392#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4393 /* Heavy stuff done. */
4394 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4395#endif
4396
4397#ifdef PGMPOOL_WITH_MONITORING
4398 /*
4399 * Deregistering the monitoring.
4400 */
4401 if (pPage->fMonitored)
4402 rc = pgmPoolMonitorFlush(pPool, pPage);
4403#endif
4404
4405 /*
4406 * Free the page.
4407 */
4408 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4409 pPage->iNext = pPool->iFreeHead;
4410 pPool->iFreeHead = pPage->idx;
4411 pPage->enmKind = PGMPOOLKIND_FREE;
4412 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4413 pPage->GCPhys = NIL_RTGCPHYS;
4414 pPage->fReusedFlushPending = false;
4415
4416 pPool->cUsedPages--;
4417 pgmUnlock(pVM);
4418 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4419 return rc;
4420}
4421
4422
4423/**
4424 * Frees a usage of a pool page.
4425 *
4426 * The caller is responsible to updating the user table so that it no longer
4427 * references the shadow page.
4428 *
4429 * @param pPool The pool.
4430 * @param HCPhys The HC physical address of the shadow page.
4431 * @param iUser The shadow page pool index of the user table.
4432 * @param iUserTable The index into the user table (shadowed).
4433 */
4434void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4435{
4436 PVM pVM = pPool->CTX_SUFF(pVM);
4437
4438 STAM_PROFILE_START(&pPool->StatFree, a);
4439 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4440 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4441 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4442 pgmLock(pVM);
4443#ifdef PGMPOOL_WITH_USER_TRACKING
4444 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4445#endif
4446#ifdef PGMPOOL_WITH_CACHE
4447 if (!pPage->fCached)
4448#endif
4449 pgmPoolFlushPage(pPool, pPage);
4450 pgmUnlock(pVM);
4451 STAM_PROFILE_STOP(&pPool->StatFree, a);
4452}
4453
4454
4455/**
4456 * Makes one or more free page free.
4457 *
4458 * @returns VBox status code.
4459 * @retval VINF_SUCCESS on success.
4460 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4461 *
4462 * @param pPool The pool.
4463 * @param enmKind Page table kind
4464 * @param iUser The user of the page.
4465 */
4466static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4467{
4468 PVM pVM = pPool->CTX_SUFF(pVM);
4469
4470 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4471
4472 /*
4473 * If the pool isn't full grown yet, expand it.
4474 */
4475 if ( pPool->cCurPages < pPool->cMaxPages
4476#if defined(IN_RC)
4477 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4478 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4479 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4480#endif
4481 )
4482 {
4483 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4484#ifdef IN_RING3
4485 int rc = PGMR3PoolGrow(pVM);
4486#else
4487 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4488#endif
4489 if (RT_FAILURE(rc))
4490 return rc;
4491 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4492 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4493 return VINF_SUCCESS;
4494 }
4495
4496#ifdef PGMPOOL_WITH_CACHE
4497 /*
4498 * Free one cached page.
4499 */
4500 return pgmPoolCacheFreeOne(pPool, iUser);
4501#else
4502 /*
4503 * Flush the pool.
4504 *
4505 * If we have tracking enabled, it should be possible to come up with
4506 * a cheap replacement strategy...
4507 */
4508 /* @todo This path no longer works (CR3 root pages will be flushed)!! */
4509 AssertCompileFailed();
4510 Assert(!CPUMIsGuestInLongMode(pVM));
4511 pgmPoolFlushAllInt(pPool);
4512 return VERR_PGM_POOL_FLUSHED;
4513#endif
4514}
4515
4516/**
4517 * Allocates a page from the pool.
4518 *
4519 * This page may actually be a cached page and not in need of any processing
4520 * on the callers part.
4521 *
4522 * @returns VBox status code.
4523 * @retval VINF_SUCCESS if a NEW page was allocated.
4524 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4525 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4526 * @param pVM The VM handle.
4527 * @param GCPhys The GC physical address of the page we're gonna shadow.
4528 * For 4MB and 2MB PD entries, it's the first address the
4529 * shadow PT is covering.
4530 * @param enmKind The kind of mapping.
4531 * @param enmAccess Access type for the mapping (only relevant for big pages)
4532 * @param iUser The shadow page pool index of the user table.
4533 * @param iUserTable The index into the user table (shadowed).
4534 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4535 * @param fLockPage Lock the page
4536 */
4537int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4538{
4539 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4540 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4541 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4542 *ppPage = NULL;
4543 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4544 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4545 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4546
4547 pgmLock(pVM);
4548
4549#ifdef PGMPOOL_WITH_CACHE
4550 if (pPool->fCacheEnabled)
4551 {
4552 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4553 if (RT_SUCCESS(rc2))
4554 {
4555 if (fLockPage)
4556 pgmPoolLockPage(pPool, *ppPage);
4557 pgmUnlock(pVM);
4558 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4559 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4560 return rc2;
4561 }
4562 }
4563#endif
4564
4565 /*
4566 * Allocate a new one.
4567 */
4568 int rc = VINF_SUCCESS;
4569 uint16_t iNew = pPool->iFreeHead;
4570 if (iNew == NIL_PGMPOOL_IDX)
4571 {
4572 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4573 if (RT_FAILURE(rc))
4574 {
4575 pgmUnlock(pVM);
4576 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4577 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4578 return rc;
4579 }
4580 iNew = pPool->iFreeHead;
4581 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4582 }
4583
4584 /* unlink the free head */
4585 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4586 pPool->iFreeHead = pPage->iNext;
4587 pPage->iNext = NIL_PGMPOOL_IDX;
4588
4589 /*
4590 * Initialize it.
4591 */
4592 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4593 pPage->enmKind = enmKind;
4594 pPage->enmAccess = enmAccess;
4595 pPage->GCPhys = GCPhys;
4596 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4597 pPage->fMonitored = false;
4598 pPage->fCached = false;
4599#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4600 pPage->fDirty = false;
4601#endif
4602 pPage->fReusedFlushPending = false;
4603#ifdef PGMPOOL_WITH_MONITORING
4604 pPage->cModifications = 0;
4605 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4606 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4607#else
4608 pPage->fCR3Mix = false;
4609#endif
4610#ifdef PGMPOOL_WITH_USER_TRACKING
4611 pPage->cPresent = 0;
4612 pPage->iFirstPresent = ~0;
4613 pPage->pvLastAccessHandlerFault = 0;
4614 pPage->cLastAccessHandlerCount = 0;
4615 pPage->pvLastAccessHandlerRip = 0;
4616
4617 /*
4618 * Insert into the tracking and cache. If this fails, free the page.
4619 */
4620 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4621 if (RT_FAILURE(rc3))
4622 {
4623 pPool->cUsedPages--;
4624 pPage->enmKind = PGMPOOLKIND_FREE;
4625 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4626 pPage->GCPhys = NIL_RTGCPHYS;
4627 pPage->iNext = pPool->iFreeHead;
4628 pPool->iFreeHead = pPage->idx;
4629 pgmUnlock(pVM);
4630 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4631 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4632 return rc3;
4633 }
4634#endif /* PGMPOOL_WITH_USER_TRACKING */
4635
4636 /*
4637 * Commit the allocation, clear the page and return.
4638 */
4639#ifdef VBOX_WITH_STATISTICS
4640 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4641 pPool->cUsedPagesHigh = pPool->cUsedPages;
4642#endif
4643
4644 if (!pPage->fZeroed)
4645 {
4646 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4647 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4648 ASMMemZeroPage(pv);
4649 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4650 }
4651
4652 *ppPage = pPage;
4653 if (fLockPage)
4654 pgmPoolLockPage(pPool, pPage);
4655 pgmUnlock(pVM);
4656 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4657 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4658 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4659 return rc;
4660}
4661
4662
4663/**
4664 * Frees a usage of a pool page.
4665 *
4666 * @param pVM The VM handle.
4667 * @param HCPhys The HC physical address of the shadow page.
4668 * @param iUser The shadow page pool index of the user table.
4669 * @param iUserTable The index into the user table (shadowed).
4670 */
4671void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4672{
4673 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4674 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4675 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4676}
4677
4678/**
4679 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4680 *
4681 * @returns Pointer to the shadow page structure.
4682 * @param pPool The pool.
4683 * @param HCPhys The HC physical address of the shadow page.
4684 */
4685PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4686{
4687 PVM pVM = pPool->CTX_SUFF(pVM);
4688
4689 Assert(PGMIsLockOwner(pVM));
4690
4691 /*
4692 * Look up the page.
4693 */
4694 pgmLock(pVM);
4695 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4696 pgmUnlock(pVM);
4697
4698 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4699 return pPage;
4700}
4701
4702/**
4703 * Flush the specified page if present
4704 *
4705 * @param pVM The VM handle.
4706 * @param GCPhys Guest physical address of the page to flush
4707 */
4708VMMDECL(void) PGMPoolFlushPage(PVM pVM, RTGCPHYS GCPhys)
4709{
4710#ifdef PGMPOOL_WITH_CACHE
4711 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4712
4713 /*
4714 * Look up the GCPhys in the hash.
4715 */
4716 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
4717 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
4718 if (i == NIL_PGMPOOL_IDX)
4719 return;
4720
4721 do
4722 {
4723 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4724 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
4725 {
4726 switch (pPage->enmKind)
4727 {
4728 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4729 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4730 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4731 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4732 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4733 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4734 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4735 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4736 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4737 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4738 case PGMPOOLKIND_64BIT_PML4:
4739 case PGMPOOLKIND_32BIT_PD:
4740 case PGMPOOLKIND_PAE_PDPT:
4741 {
4742 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
4743 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
4744 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
4745 pgmPoolMonitorChainFlush(pPool, pPage);
4746 return;
4747 }
4748
4749 /* ignore, no monitoring. */
4750 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4751 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4752 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4753 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4754 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4755 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4756 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4757 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4758 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4759 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4760 case PGMPOOLKIND_ROOT_NESTED:
4761 case PGMPOOLKIND_PAE_PD_PHYS:
4762 case PGMPOOLKIND_PAE_PDPT_PHYS:
4763 case PGMPOOLKIND_32BIT_PD_PHYS:
4764 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4765 break;
4766
4767 default:
4768 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4769 }
4770 }
4771
4772 /* next */
4773 i = pPage->iNext;
4774 } while (i != NIL_PGMPOOL_IDX);
4775#endif
4776 return;
4777}
4778
4779#ifdef IN_RING3
4780/**
4781 * Flushes the entire cache.
4782 *
4783 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4784 * and execute this CR3 flush.
4785 *
4786 * @param pPool The pool.
4787 */
4788void pgmR3PoolReset(PVM pVM)
4789{
4790 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4791
4792 Assert(PGMIsLockOwner(pVM));
4793 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
4794 LogFlow(("pgmPoolFlushAllInt:\n"));
4795
4796 /*
4797 * If there are no pages in the pool, there is nothing to do.
4798 */
4799 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4800 {
4801 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4802 return;
4803 }
4804
4805 /*
4806 * Exit the shadow mode since we're going to clear everything,
4807 * including the root page.
4808 */
4809 for (unsigned i=0;i<pVM->cCPUs;i++)
4810 {
4811 PVMCPU pVCpu = &pVM->aCpus[i];
4812 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
4813 }
4814
4815 /*
4816 * Nuke the free list and reinsert all pages into it.
4817 */
4818 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4819 {
4820 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4821
4822 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
4823#ifdef PGMPOOL_WITH_MONITORING
4824 if (pPage->fMonitored)
4825 pgmPoolMonitorFlush(pPool, pPage);
4826 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4827 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4828 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4829 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4830 pPage->cModifications = 0;
4831#endif
4832 pPage->GCPhys = NIL_RTGCPHYS;
4833 pPage->enmKind = PGMPOOLKIND_FREE;
4834 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4835 Assert(pPage->idx == i);
4836 pPage->iNext = i + 1;
4837 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4838 pPage->fSeenNonGlobal = false;
4839 pPage->fMonitored = false;
4840#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4841 pPage->fDirty = false;
4842#endif
4843 pPage->fCached = false;
4844 pPage->fReusedFlushPending = false;
4845#ifdef PGMPOOL_WITH_USER_TRACKING
4846 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4847#else
4848 pPage->fCR3Mix = false;
4849#endif
4850#ifdef PGMPOOL_WITH_CACHE
4851 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4852 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4853#endif
4854 pPage->cLocked = 0;
4855 }
4856 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4857 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4858 pPool->cUsedPages = 0;
4859
4860#ifdef PGMPOOL_WITH_USER_TRACKING
4861 /*
4862 * Zap and reinitialize the user records.
4863 */
4864 pPool->cPresent = 0;
4865 pPool->iUserFreeHead = 0;
4866 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4867 const unsigned cMaxUsers = pPool->cMaxUsers;
4868 for (unsigned i = 0; i < cMaxUsers; i++)
4869 {
4870 paUsers[i].iNext = i + 1;
4871 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4872 paUsers[i].iUserTable = 0xfffffffe;
4873 }
4874 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4875#endif
4876
4877#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4878 /*
4879 * Clear all the GCPhys links and rebuild the phys ext free list.
4880 */
4881 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
4882 pRam;
4883 pRam = pRam->CTX_SUFF(pNext))
4884 {
4885 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4886 while (iPage-- > 0)
4887 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
4888 }
4889
4890 pPool->iPhysExtFreeHead = 0;
4891 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4892 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4893 for (unsigned i = 0; i < cMaxPhysExts; i++)
4894 {
4895 paPhysExts[i].iNext = i + 1;
4896 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4897 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4898 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4899 }
4900 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4901#endif
4902
4903#ifdef PGMPOOL_WITH_MONITORING
4904 /*
4905 * Just zap the modified list.
4906 */
4907 pPool->cModifiedPages = 0;
4908 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4909#endif
4910
4911#ifdef PGMPOOL_WITH_CACHE
4912 /*
4913 * Clear the GCPhys hash and the age list.
4914 */
4915 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4916 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4917 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4918 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4919#endif
4920
4921#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4922 /* Clear all dirty pages. */
4923 pPool->idxFreeDirtyPage = 0;
4924 pPool->cDirtyPages = 0;
4925 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
4926 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
4927#endif
4928
4929 /*
4930 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4931 */
4932 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4933 {
4934 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4935 pPage->iNext = NIL_PGMPOOL_IDX;
4936#ifdef PGMPOOL_WITH_MONITORING
4937 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4938 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4939 pPage->cModifications = 0;
4940 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4941 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4942 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4943 if (pPage->fMonitored)
4944 {
4945 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4946 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4947 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4948 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4949 pPool->pszAccessHandler);
4950 AssertFatalRCSuccess(rc);
4951# ifdef PGMPOOL_WITH_CACHE
4952 pgmPoolHashInsert(pPool, pPage);
4953# endif
4954 }
4955#endif
4956#ifdef PGMPOOL_WITH_USER_TRACKING
4957 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4958#endif
4959#ifdef PGMPOOL_WITH_CACHE
4960 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4961 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4962#endif
4963 }
4964
4965 for (unsigned i=0;i<pVM->cCPUs;i++)
4966 {
4967 PVMCPU pVCpu = &pVM->aCpus[i];
4968 /*
4969 * Re-enter the shadowing mode and assert Sync CR3 FF.
4970 */
4971 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
4972 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
4973 }
4974
4975 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4976}
4977#endif /* IN_RING3 */
4978
4979#ifdef LOG_ENABLED
4980static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
4981{
4982 switch(enmKind)
4983 {
4984 case PGMPOOLKIND_INVALID:
4985 return "PGMPOOLKIND_INVALID";
4986 case PGMPOOLKIND_FREE:
4987 return "PGMPOOLKIND_FREE";
4988 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4989 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4990 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4991 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4992 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4993 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4994 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4995 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4996 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4997 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4998 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4999 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5000 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5001 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5002 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5003 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5004 case PGMPOOLKIND_32BIT_PD:
5005 return "PGMPOOLKIND_32BIT_PD";
5006 case PGMPOOLKIND_32BIT_PD_PHYS:
5007 return "PGMPOOLKIND_32BIT_PD_PHYS";
5008 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5009 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5010 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5011 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5012 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5013 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5014 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5015 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5016 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5017 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5018 case PGMPOOLKIND_PAE_PD_PHYS:
5019 return "PGMPOOLKIND_PAE_PD_PHYS";
5020 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5021 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5022 case PGMPOOLKIND_PAE_PDPT:
5023 return "PGMPOOLKIND_PAE_PDPT";
5024 case PGMPOOLKIND_PAE_PDPT_PHYS:
5025 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5026 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5027 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5028 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5029 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5030 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5031 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5032 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5033 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5034 case PGMPOOLKIND_64BIT_PML4:
5035 return "PGMPOOLKIND_64BIT_PML4";
5036 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5037 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5038 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5039 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5040 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5041 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5042 case PGMPOOLKIND_ROOT_NESTED:
5043 return "PGMPOOLKIND_ROOT_NESTED";
5044 }
5045 return "Unknown kind!";
5046}
5047#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette