VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 22741

Last change on this file since 22741 was 22741, checked in by vboxsync, 16 years ago

Must flush dirty pages in pgmPoolClearAll

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 186.7 KB
Line 
1/* $Id: PGMAllPool.cpp 22741 2009-09-03 12:46:00Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48RT_C_DECLS_BEGIN
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50#ifdef PGMPOOL_WITH_USER_TRACKING
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54#endif
55#ifdef PGMPOOL_WITH_CACHE
56static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
57#endif
58#ifdef PGMPOOL_WITH_MONITORING
59static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
60#endif
61#ifndef IN_RING3
62DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
63#endif
64#ifdef LOG_ENABLED
65static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
66#endif
67
68void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs);
69void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt);
70int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
71PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
72void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
73void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
74
75RT_C_DECLS_END
76
77
78/**
79 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
80 *
81 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
82 * @param enmKind The page kind.
83 */
84DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
85{
86 switch (enmKind)
87 {
88 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
89 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
90 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
91 return true;
92 default:
93 return false;
94 }
95}
96
97/** @def PGMPOOL_PAGE_2_LOCKED_PTR
98 * Maps a pool page pool into the current context and lock it (RC only).
99 *
100 * @returns VBox status code.
101 * @param pVM The VM handle.
102 * @param pPage The pool page.
103 *
104 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
105 * small page window employeed by that function. Be careful.
106 * @remark There is no need to assert on the result.
107 */
108#if defined(IN_RC)
109DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
110{
111 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
112
113 /* Make sure the dynamic mapping will not be reused. */
114 if (pv)
115 PGMDynLockHCPage(pVM, (uint8_t *)pv);
116
117 return pv;
118}
119#else
120# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
121#endif
122
123/** @def PGMPOOL_UNLOCK_PTR
124 * Unlock a previously locked dynamic caching (RC only).
125 *
126 * @returns VBox status code.
127 * @param pVM The VM handle.
128 * @param pPage The pool page.
129 *
130 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
131 * small page window employeed by that function. Be careful.
132 * @remark There is no need to assert on the result.
133 */
134#if defined(IN_RC)
135DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
136{
137 if (pvPage)
138 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
139}
140#else
141# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
142#endif
143
144
145#ifdef PGMPOOL_WITH_MONITORING
146/**
147 * Determin the size of a write instruction.
148 * @returns number of bytes written.
149 * @param pDis The disassembler state.
150 */
151static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
152{
153 /*
154 * This is very crude and possibly wrong for some opcodes,
155 * but since it's not really supposed to be called we can
156 * probably live with that.
157 */
158 return DISGetParamSize(pDis, &pDis->param1);
159}
160
161
162/**
163 * Flushes a chain of pages sharing the same access monitor.
164 *
165 * @returns VBox status code suitable for scheduling.
166 * @param pPool The pool.
167 * @param pPage A page in the chain.
168 */
169int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
170{
171 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
172
173 /*
174 * Find the list head.
175 */
176 uint16_t idx = pPage->idx;
177 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
178 {
179 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
180 {
181 idx = pPage->iMonitoredPrev;
182 Assert(idx != pPage->idx);
183 pPage = &pPool->aPages[idx];
184 }
185 }
186
187 /*
188 * Iterate the list flushing each shadow page.
189 */
190 int rc = VINF_SUCCESS;
191 for (;;)
192 {
193 idx = pPage->iMonitoredNext;
194 Assert(idx != pPage->idx);
195 if (pPage->idx >= PGMPOOL_IDX_FIRST)
196 {
197 int rc2 = pgmPoolFlushPage(pPool, pPage);
198 AssertRC(rc2);
199 }
200 /* next */
201 if (idx == NIL_PGMPOOL_IDX)
202 break;
203 pPage = &pPool->aPages[idx];
204 }
205 return rc;
206}
207
208
209/**
210 * Wrapper for getting the current context pointer to the entry being modified.
211 *
212 * @returns VBox status code suitable for scheduling.
213 * @param pVM VM Handle.
214 * @param pvDst Destination address
215 * @param pvSrc Source guest virtual address.
216 * @param GCPhysSrc The source guest physical address.
217 * @param cb Size of data to read
218 */
219DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
220{
221#if defined(IN_RING3)
222 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
223 return VINF_SUCCESS;
224#else
225 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
226 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
227#endif
228}
229
230/**
231 * Process shadow entries before they are changed by the guest.
232 *
233 * For PT entries we will clear them. For PD entries, we'll simply check
234 * for mapping conflicts and set the SyncCR3 FF if found.
235 *
236 * @param pVCpu VMCPU handle
237 * @param pPool The pool.
238 * @param pPage The head page.
239 * @param GCPhysFault The guest physical fault address.
240 * @param uAddress In R0 and GC this is the guest context fault address (flat).
241 * In R3 this is the host context 'fault' address.
242 * @param pDis The disassembler state for figuring out the write size.
243 * This need not be specified if the caller knows we won't do cross entry accesses.
244 */
245void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, PDISCPUSTATE pDis)
246{
247 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
248 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
249 const unsigned cbWrite = pDis ? pgmPoolDisasWriteSize(pDis) : 0;
250 PVM pVM = pPool->CTX_SUFF(pVM);
251
252 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)pvAddress, GCPhysFault, cbWrite));
253
254 for (;;)
255 {
256 union
257 {
258 void *pv;
259 PX86PT pPT;
260 PX86PTPAE pPTPae;
261 PX86PD pPD;
262 PX86PDPAE pPDPae;
263 PX86PDPT pPDPT;
264 PX86PML4 pPML4;
265 } uShw;
266
267 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
268
269 uShw.pv = NULL;
270 switch (pPage->enmKind)
271 {
272 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
273 {
274 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
275 const unsigned iShw = off / sizeof(X86PTE);
276 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
277 if (uShw.pPT->a[iShw].n.u1Present)
278 {
279# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
280 X86PTE GstPte;
281
282 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
283 AssertRC(rc);
284 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
285 pgmPoolTracDerefGCPhysHint(pPool, pPage,
286 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
287 GstPte.u & X86_PTE_PG_MASK);
288# endif
289 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
290 }
291 break;
292 }
293
294 /* page/2 sized */
295 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
296 {
297 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
298 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
299 {
300 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
301 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
302 if (uShw.pPTPae->a[iShw].n.u1Present)
303 {
304# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
305 X86PTE GstPte;
306 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
307 AssertRC(rc);
308
309 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
310 pgmPoolTracDerefGCPhysHint(pPool, pPage,
311 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
312 GstPte.u & X86_PTE_PG_MASK);
313# endif
314 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
315 }
316 }
317 break;
318 }
319
320 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
321 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
322 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
323 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
324 {
325 unsigned iGst = off / sizeof(X86PDE);
326 unsigned iShwPdpt = iGst / 256;
327 unsigned iShw = (iGst % 256) * 2;
328 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
329
330 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
331 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
332 {
333 for (unsigned i = 0; i < 2; i++)
334 {
335# ifndef IN_RING0
336 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
337 {
338 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
339 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
340 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
341 break;
342 }
343 else
344# endif /* !IN_RING0 */
345 if (uShw.pPDPae->a[iShw+i].n.u1Present)
346 {
347 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
348 pgmPoolFree(pVM,
349 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
350 pPage->idx,
351 iShw + i);
352 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
353 }
354
355 /* paranoia / a bit assumptive. */
356 if ( pDis
357 && (off & 3)
358 && (off & 3) + cbWrite > 4)
359 {
360 const unsigned iShw2 = iShw + 2 + i;
361 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
362 {
363# ifndef IN_RING0
364 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
365 {
366 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
367 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
368 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
369 break;
370 }
371 else
372# endif /* !IN_RING0 */
373 if (uShw.pPDPae->a[iShw2].n.u1Present)
374 {
375 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
376 pgmPoolFree(pVM,
377 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
378 pPage->idx,
379 iShw2);
380 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
381 }
382 }
383 }
384 }
385 }
386 break;
387 }
388
389 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
390 {
391 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
392 const unsigned iShw = off / sizeof(X86PTEPAE);
393 if (uShw.pPTPae->a[iShw].n.u1Present)
394 {
395# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
396 X86PTEPAE GstPte;
397 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
398 AssertRC(rc);
399
400 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
401 pgmPoolTracDerefGCPhysHint(pPool, pPage,
402 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
403 GstPte.u & X86_PTE_PAE_PG_MASK);
404# endif
405 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
406 }
407
408 /* paranoia / a bit assumptive. */
409 if ( pDis
410 && (off & 7)
411 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
412 {
413 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
414 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
415
416 if (uShw.pPTPae->a[iShw2].n.u1Present)
417 {
418# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
419 X86PTEPAE GstPte;
420# ifdef IN_RING3
421 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
422# else
423 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
424# endif
425 AssertRC(rc);
426 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
427 pgmPoolTracDerefGCPhysHint(pPool, pPage,
428 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
429 GstPte.u & X86_PTE_PAE_PG_MASK);
430# endif
431 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
432 }
433 }
434 break;
435 }
436
437 case PGMPOOLKIND_32BIT_PD:
438 {
439 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
440 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
441
442 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
443# ifndef IN_RING0
444 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
445 {
446 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
447 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
448 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
449 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
450 break;
451 }
452# endif /* !IN_RING0 */
453# ifndef IN_RING0
454 else
455# endif /* !IN_RING0 */
456 {
457 if (uShw.pPD->a[iShw].n.u1Present)
458 {
459 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
460 pgmPoolFree(pVM,
461 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
462 pPage->idx,
463 iShw);
464 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
465 }
466 }
467 /* paranoia / a bit assumptive. */
468 if ( pDis
469 && (off & 3)
470 && (off & 3) + cbWrite > sizeof(X86PTE))
471 {
472 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
473 if ( iShw2 != iShw
474 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
475 {
476# ifndef IN_RING0
477 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
478 {
479 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
480 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
481 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
482 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
483 break;
484 }
485# endif /* !IN_RING0 */
486# ifndef IN_RING0
487 else
488# endif /* !IN_RING0 */
489 {
490 if (uShw.pPD->a[iShw2].n.u1Present)
491 {
492 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
493 pgmPoolFree(pVM,
494 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
495 pPage->idx,
496 iShw2);
497 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
498 }
499 }
500 }
501 }
502#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
503 if ( uShw.pPD->a[iShw].n.u1Present
504 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
505 {
506 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
507# ifdef IN_RC /* TLB load - we're pushing things a bit... */
508 ASMProbeReadByte(pvAddress);
509# endif
510 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
511 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
512 }
513#endif
514 break;
515 }
516
517 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
518 {
519 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
520 const unsigned iShw = off / sizeof(X86PDEPAE);
521#ifndef IN_RING0
522 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
523 {
524 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
525 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
526 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
527 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
528 break;
529 }
530#endif /* !IN_RING0 */
531 /*
532 * Causes trouble when the guest uses a PDE to refer to the whole page table level
533 * structure. (Invalidate here; faults later on when it tries to change the page
534 * table entries -> recheck; probably only applies to the RC case.)
535 */
536# ifndef IN_RING0
537 else
538# endif /* !IN_RING0 */
539 {
540 if (uShw.pPDPae->a[iShw].n.u1Present)
541 {
542 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
543 pgmPoolFree(pVM,
544 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
545 pPage->idx,
546 iShw);
547 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
548 }
549 }
550 /* paranoia / a bit assumptive. */
551 if ( pDis
552 && (off & 7)
553 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
554 {
555 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
556 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
557
558#ifndef IN_RING0
559 if ( iShw2 != iShw
560 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
561 {
562 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
563 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
564 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
565 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
566 break;
567 }
568#endif /* !IN_RING0 */
569# ifndef IN_RING0
570 else
571# endif /* !IN_RING0 */
572 if (uShw.pPDPae->a[iShw2].n.u1Present)
573 {
574 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
575 pgmPoolFree(pVM,
576 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
577 pPage->idx,
578 iShw2);
579 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
580 }
581 }
582 break;
583 }
584
585 case PGMPOOLKIND_PAE_PDPT:
586 {
587 /*
588 * Hopefully this doesn't happen very often:
589 * - touching unused parts of the page
590 * - messing with the bits of pd pointers without changing the physical address
591 */
592 /* PDPT roots are not page aligned; 32 byte only! */
593 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
594
595 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
596 const unsigned iShw = offPdpt / sizeof(X86PDPE);
597 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
598 {
599# ifndef IN_RING0
600 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
601 {
602 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
603 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
604 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
605 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
606 break;
607 }
608# endif /* !IN_RING0 */
609# ifndef IN_RING0
610 else
611# endif /* !IN_RING0 */
612 if (uShw.pPDPT->a[iShw].n.u1Present)
613 {
614 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
615 pgmPoolFree(pVM,
616 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
617 pPage->idx,
618 iShw);
619 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
620 }
621
622 /* paranoia / a bit assumptive. */
623 if ( pDis
624 && (offPdpt & 7)
625 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
626 {
627 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
628 if ( iShw2 != iShw
629 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
630 {
631# ifndef IN_RING0
632 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
633 {
634 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
635 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
636 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
637 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
638 break;
639 }
640# endif /* !IN_RING0 */
641# ifndef IN_RING0
642 else
643# endif /* !IN_RING0 */
644 if (uShw.pPDPT->a[iShw2].n.u1Present)
645 {
646 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
647 pgmPoolFree(pVM,
648 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
649 pPage->idx,
650 iShw2);
651 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
652 }
653 }
654 }
655 }
656 break;
657 }
658
659#ifndef IN_RC
660 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
661 {
662 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
663 const unsigned iShw = off / sizeof(X86PDEPAE);
664 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
665 if (uShw.pPDPae->a[iShw].n.u1Present)
666 {
667 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
668 pgmPoolFree(pVM,
669 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
670 pPage->idx,
671 iShw);
672 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
673 }
674 /* paranoia / a bit assumptive. */
675 if ( pDis
676 && (off & 7)
677 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
678 {
679 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
680 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
681
682 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
683 if (uShw.pPDPae->a[iShw2].n.u1Present)
684 {
685 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
686 pgmPoolFree(pVM,
687 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
688 pPage->idx,
689 iShw2);
690 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
691 }
692 }
693 break;
694 }
695
696 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
697 {
698 /*
699 * Hopefully this doesn't happen very often:
700 * - messing with the bits of pd pointers without changing the physical address
701 */
702 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
703 {
704 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
705 const unsigned iShw = off / sizeof(X86PDPE);
706 if (uShw.pPDPT->a[iShw].n.u1Present)
707 {
708 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
709 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
710 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
711 }
712 /* paranoia / a bit assumptive. */
713 if ( pDis
714 && (off & 7)
715 && (off & 7) + cbWrite > sizeof(X86PDPE))
716 {
717 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
718 if (uShw.pPDPT->a[iShw2].n.u1Present)
719 {
720 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
721 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
722 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
723 }
724 }
725 }
726 break;
727 }
728
729 case PGMPOOLKIND_64BIT_PML4:
730 {
731 /*
732 * Hopefully this doesn't happen very often:
733 * - messing with the bits of pd pointers without changing the physical address
734 */
735 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
736 {
737 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
738 const unsigned iShw = off / sizeof(X86PDPE);
739 if (uShw.pPML4->a[iShw].n.u1Present)
740 {
741 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
742 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
743 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
744 }
745 /* paranoia / a bit assumptive. */
746 if ( pDis
747 && (off & 7)
748 && (off & 7) + cbWrite > sizeof(X86PDPE))
749 {
750 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
751 if (uShw.pPML4->a[iShw2].n.u1Present)
752 {
753 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
754 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
755 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
756 }
757 }
758 }
759 break;
760 }
761#endif /* IN_RING0 */
762
763 default:
764 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
765 }
766 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
767
768 /* next */
769 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
770 return;
771 pPage = &pPool->aPages[pPage->iMonitoredNext];
772 }
773}
774
775# ifndef IN_RING3
776/**
777 * Checks if a access could be a fork operation in progress.
778 *
779 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
780 *
781 * @returns true if it's likly that we're forking, otherwise false.
782 * @param pPool The pool.
783 * @param pDis The disassembled instruction.
784 * @param offFault The access offset.
785 */
786DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
787{
788 /*
789 * i386 linux is using btr to clear X86_PTE_RW.
790 * The functions involved are (2.6.16 source inspection):
791 * clear_bit
792 * ptep_set_wrprotect
793 * copy_one_pte
794 * copy_pte_range
795 * copy_pmd_range
796 * copy_pud_range
797 * copy_page_range
798 * dup_mmap
799 * dup_mm
800 * copy_mm
801 * copy_process
802 * do_fork
803 */
804 if ( pDis->pCurInstr->opcode == OP_BTR
805 && !(offFault & 4)
806 /** @todo Validate that the bit index is X86_PTE_RW. */
807 )
808 {
809 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
810 return true;
811 }
812 return false;
813}
814
815
816/**
817 * Determine whether the page is likely to have been reused.
818 *
819 * @returns true if we consider the page as being reused for a different purpose.
820 * @returns false if we consider it to still be a paging page.
821 * @param pVM VM Handle.
822 * @param pVCpu VMCPU Handle.
823 * @param pRegFrame Trap register frame.
824 * @param pDis The disassembly info for the faulting instruction.
825 * @param pvFault The fault address.
826 *
827 * @remark The REP prefix check is left to the caller because of STOSD/W.
828 */
829DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
830{
831#ifndef IN_RC
832 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
833 if ( HWACCMHasPendingIrq(pVM)
834 && (pRegFrame->rsp - pvFault) < 32)
835 {
836 /* Fault caused by stack writes while trying to inject an interrupt event. */
837 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
838 return true;
839 }
840#else
841 NOREF(pVM); NOREF(pvFault);
842#endif
843
844 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
845
846 /* Non-supervisor mode write means it's used for something else. */
847 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
848 return true;
849
850 switch (pDis->pCurInstr->opcode)
851 {
852 /* call implies the actual push of the return address faulted */
853 case OP_CALL:
854 Log4(("pgmPoolMonitorIsReused: CALL\n"));
855 return true;
856 case OP_PUSH:
857 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
858 return true;
859 case OP_PUSHF:
860 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
861 return true;
862 case OP_PUSHA:
863 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
864 return true;
865 case OP_FXSAVE:
866 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
867 return true;
868 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
869 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
870 return true;
871 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
872 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
873 return true;
874 case OP_MOVSWD:
875 case OP_STOSWD:
876 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
877 && pRegFrame->rcx >= 0x40
878 )
879 {
880 Assert(pDis->mode == CPUMODE_64BIT);
881
882 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
883 return true;
884 }
885 return false;
886 }
887 if ( ( (pDis->param1.flags & USE_REG_GEN32)
888 || (pDis->param1.flags & USE_REG_GEN64))
889 && (pDis->param1.base.reg_gen == USE_REG_ESP))
890 {
891 Log4(("pgmPoolMonitorIsReused: ESP\n"));
892 return true;
893 }
894
895 return false;
896}
897
898
899/**
900 * Flushes the page being accessed.
901 *
902 * @returns VBox status code suitable for scheduling.
903 * @param pVM The VM handle.
904 * @param pVCpu The VMCPU handle.
905 * @param pPool The pool.
906 * @param pPage The pool page (head).
907 * @param pDis The disassembly of the write instruction.
908 * @param pRegFrame The trap register frame.
909 * @param GCPhysFault The fault address as guest physical address.
910 * @param pvFault The fault address.
911 */
912static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
913 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
914{
915 /*
916 * First, do the flushing.
917 */
918 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
919
920 /*
921 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
922 * @todo: why is this necessary? an instruction restart would be sufficient, wouldn't it?
923 */
924 uint32_t cbWritten;
925 int rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten);
926 if (RT_SUCCESS(rc2))
927 pRegFrame->rip += pDis->opsize;
928 else if (rc2 == VERR_EM_INTERPRETER)
929 {
930#ifdef IN_RC
931 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
932 {
933 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
934 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
935 rc = VINF_SUCCESS;
936 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
937 }
938 else
939#endif
940 {
941 rc = VINF_EM_RAW_EMULATE_INSTR;
942 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
943 }
944 }
945 else
946 rc = rc2;
947
948 /* See use in pgmPoolAccessHandlerSimple(). */
949 PGM_INVL_VCPU_TLBS(pVCpu);
950
951 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
952 return rc;
953
954}
955
956
957/**
958 * Handles the STOSD write accesses.
959 *
960 * @returns VBox status code suitable for scheduling.
961 * @param pVM The VM handle.
962 * @param pPool The pool.
963 * @param pPage The pool page (head).
964 * @param pDis The disassembly of the write instruction.
965 * @param pRegFrame The trap register frame.
966 * @param GCPhysFault The fault address as guest physical address.
967 * @param pvFault The fault address.
968 */
969DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
970 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
971{
972 unsigned uIncrement = pDis->param1.size;
973
974 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
975 Assert(pRegFrame->rcx <= 0x20);
976
977#ifdef VBOX_STRICT
978 if (pDis->opmode == CPUMODE_32BIT)
979 Assert(uIncrement == 4);
980 else
981 Assert(uIncrement == 8);
982#endif
983
984 Log3(("pgmPoolAccessHandlerSTOSD\n"));
985
986 /*
987 * Increment the modification counter and insert it into the list
988 * of modified pages the first time.
989 */
990 if (!pPage->cModifications++)
991 pgmPoolMonitorModifiedInsert(pPool, pPage);
992
993 /*
994 * Execute REP STOSD.
995 *
996 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
997 * write situation, meaning that it's safe to write here.
998 */
999 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1000 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
1001 while (pRegFrame->rcx)
1002 {
1003#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1004 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1005 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1006 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1007#else
1008 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1009#endif
1010#ifdef IN_RC
1011 *(uint32_t *)pu32 = pRegFrame->eax;
1012#else
1013 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
1014#endif
1015 pu32 += uIncrement;
1016 GCPhysFault += uIncrement;
1017 pRegFrame->rdi += uIncrement;
1018 pRegFrame->rcx--;
1019 }
1020 pRegFrame->rip += pDis->opsize;
1021
1022#ifdef IN_RC
1023 /* See use in pgmPoolAccessHandlerSimple(). */
1024 PGM_INVL_VCPU_TLBS(pVCpu);
1025#endif
1026
1027 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1028 return VINF_SUCCESS;
1029}
1030
1031
1032/**
1033 * Handles the simple write accesses.
1034 *
1035 * @returns VBox status code suitable for scheduling.
1036 * @param pVM The VM handle.
1037 * @param pVCpu The VMCPU handle.
1038 * @param pPool The pool.
1039 * @param pPage The pool page (head).
1040 * @param pDis The disassembly of the write instruction.
1041 * @param pRegFrame The trap register frame.
1042 * @param GCPhysFault The fault address as guest physical address.
1043 * @param pvFault The fault address.
1044 */
1045DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1046 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1047{
1048 Log3(("pgmPoolAccessHandlerSimple\n"));
1049 /*
1050 * Increment the modification counter and insert it into the list
1051 * of modified pages the first time.
1052 */
1053 if (!pPage->cModifications++)
1054 pgmPoolMonitorModifiedInsert(pPool, pPage);
1055
1056 /*
1057 * Clear all the pages. ASSUMES that pvFault is readable.
1058 */
1059#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1060 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1061 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, pDis);
1062 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1063#else
1064 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, pDis);
1065#endif
1066
1067 /*
1068 * Interpret the instruction.
1069 */
1070 uint32_t cb;
1071 int rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb);
1072 if (RT_SUCCESS(rc))
1073 pRegFrame->rip += pDis->opsize;
1074 else if (rc == VERR_EM_INTERPRETER)
1075 {
1076 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1077 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1078 rc = VINF_EM_RAW_EMULATE_INSTR;
1079 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1080 }
1081
1082#ifdef IN_RC
1083 /*
1084 * Quick hack, with logging enabled we're getting stale
1085 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1086 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1087 * have to be fixed to support this. But that'll have to wait till next week.
1088 *
1089 * An alternative is to keep track of the changed PTEs together with the
1090 * GCPhys from the guest PT. This may proove expensive though.
1091 *
1092 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1093 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1094 */
1095 PGM_INVL_VCPU_TLBS(pVCpu);
1096#endif
1097
1098 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1099 return rc;
1100}
1101
1102/**
1103 * \#PF Handler callback for PT write accesses.
1104 *
1105 * @returns VBox status code (appropriate for GC return).
1106 * @param pVM VM Handle.
1107 * @param uErrorCode CPU Error code.
1108 * @param pRegFrame Trap register frame.
1109 * NULL on DMA and other non CPU access.
1110 * @param pvFault The fault address (cr2).
1111 * @param GCPhysFault The GC physical address corresponding to pvFault.
1112 * @param pvUser User argument.
1113 */
1114DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1115{
1116 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1117 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1118 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1119 PVMCPU pVCpu = VMMGetCpu(pVM);
1120 unsigned cMaxModifications;
1121 bool fForcedFlush = false;
1122
1123 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1124
1125 pgmLock(pVM);
1126 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1127 {
1128 /* Pool page changed while we were waiting for the lock; ignore. */
1129 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1130 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1131 pgmUnlock(pVM);
1132 return VINF_SUCCESS;
1133 }
1134
1135 /*
1136 * Disassemble the faulting instruction.
1137 */
1138 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1139 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1140 AssertReturnStmt(rc == VINF_SUCCESS, pgmUnlock(pVM), rc);
1141
1142 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1143
1144 /*
1145 * We should ALWAYS have the list head as user parameter. This
1146 * is because we use that page to record the changes.
1147 */
1148 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1149#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1150 Assert(!pPage->fDirty);
1151#endif
1152
1153 /* Maximum nr of modifications depends on the guest mode. */
1154 if (pDis->mode == CPUMODE_32BIT)
1155 cMaxModifications = 48;
1156 else
1157 cMaxModifications = 24;
1158
1159 /*
1160 * Incremental page table updates should weight more than random ones.
1161 * (Only applies when started from offset 0)
1162 */
1163 pVCpu->pgm.s.cPoolAccessHandler++;
1164 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1165 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1166 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1167 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1168 {
1169 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1170 pPage->cModifications = pPage->cModifications * 2;
1171 pPage->pvLastAccessHandlerFault = pvFault;
1172 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1173 if (pPage->cModifications >= cMaxModifications)
1174 {
1175 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1176 fForcedFlush = true;
1177 }
1178 }
1179
1180 if (pPage->cModifications >= cMaxModifications)
1181 Log(("Mod overflow %VGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1182
1183 /*
1184 * Check if it's worth dealing with.
1185 */
1186 bool fReused = false;
1187 bool fNotReusedNotForking = false;
1188 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1189 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1190 )
1191 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1192 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1193 {
1194 /*
1195 * Simple instructions, no REP prefix.
1196 */
1197 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1198 {
1199 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1200
1201 /* A mov instruction to change the first page table entry will be remembered so we can detect
1202 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1203 */
1204 if ( rc == VINF_SUCCESS
1205 && pDis->pCurInstr->opcode == OP_MOV
1206 && (pvFault & PAGE_OFFSET_MASK) == 0)
1207 {
1208 pPage->pvLastAccessHandlerFault = pvFault;
1209 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1210 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1211 /* Make sure we don't kick out a page too quickly. */
1212 if (pPage->cModifications > 8)
1213 pPage->cModifications = 2;
1214 }
1215 else
1216 if (pPage->pvLastAccessHandlerFault == pvFault)
1217 {
1218 /* ignore the 2nd write to this page table entry. */
1219 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1220 }
1221 else
1222 {
1223 pPage->pvLastAccessHandlerFault = 0;
1224 pPage->pvLastAccessHandlerRip = 0;
1225 }
1226
1227 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1228 pgmUnlock(pVM);
1229 return rc;
1230 }
1231
1232 /*
1233 * Windows is frequently doing small memset() operations (netio test 4k+).
1234 * We have to deal with these or we'll kill the cache and performance.
1235 */
1236 if ( pDis->pCurInstr->opcode == OP_STOSWD
1237 && !pRegFrame->eflags.Bits.u1DF
1238 && pDis->opmode == pDis->mode
1239 && pDis->addrmode == pDis->mode)
1240 {
1241 bool fValidStosd = false;
1242
1243 if ( pDis->mode == CPUMODE_32BIT
1244 && pDis->prefix == PREFIX_REP
1245 && pRegFrame->ecx <= 0x20
1246 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1247 && !((uintptr_t)pvFault & 3)
1248 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1249 )
1250 {
1251 fValidStosd = true;
1252 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1253 }
1254 else
1255 if ( pDis->mode == CPUMODE_64BIT
1256 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1257 && pRegFrame->rcx <= 0x20
1258 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1259 && !((uintptr_t)pvFault & 7)
1260 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1261 )
1262 {
1263 fValidStosd = true;
1264 }
1265
1266 if (fValidStosd)
1267 {
1268 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1269 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1270 pgmUnlock(pVM);
1271 return rc;
1272 }
1273 }
1274
1275 /* REP prefix, don't bother. */
1276 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1277 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1278 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1279 fNotReusedNotForking = true;
1280 }
1281
1282#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1283 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1284 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1285 */
1286 if ( pPage->cModifications >= cMaxModifications
1287 && !fForcedFlush
1288 && pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1289 && ( fNotReusedNotForking
1290 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1291 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1292 )
1293 )
1294 {
1295 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1296 Assert(pPage->fDirty == false);
1297
1298 /* Flush any monitored duplicates as we will disable write protection. */
1299 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1300 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1301 {
1302 PPGMPOOLPAGE pPageHead = pPage;
1303
1304 /* Find the monitor head. */
1305 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1306 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1307
1308 while (pPageHead)
1309 {
1310 unsigned idxNext = pPageHead->iMonitoredNext;
1311
1312 if (pPageHead != pPage)
1313 {
1314 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1315 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1316 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1317 AssertRC(rc2);
1318 }
1319
1320 if (idxNext == NIL_PGMPOOL_IDX)
1321 break;
1322
1323 pPageHead = &pPool->aPages[idxNext];
1324 }
1325 }
1326
1327 /* The flushing above might fail for locked pages, so double check. */
1328 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1329 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1330 {
1331 /* Temporarily allow write access to the page table again. */
1332 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
1333 if (rc == VINF_SUCCESS)
1334 {
1335 rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
1336 AssertMsg(rc == VINF_SUCCESS
1337 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1338 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1339 || rc == VERR_PAGE_NOT_PRESENT,
1340 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1341
1342 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1343 pPage->pvDirtyFault = pvFault;
1344
1345 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1346 pgmUnlock(pVM);
1347 return rc;
1348 }
1349 }
1350 }
1351#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1352
1353 /*
1354 * Not worth it, so flush it.
1355 *
1356 * If we considered it to be reused, don't go back to ring-3
1357 * to emulate failed instructions since we usually cannot
1358 * interpret then. This may be a bit risky, in which case
1359 * the reuse detection must be fixed.
1360 */
1361 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1362 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1363 rc = VINF_SUCCESS;
1364 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1365 pgmUnlock(pVM);
1366 return rc;
1367}
1368
1369# endif /* !IN_RING3 */
1370
1371# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1372
1373# ifdef VBOX_STRICT
1374/**
1375 * Check references to guest physical memory in a PAE / PAE page table.
1376 *
1377 * @param pPool The pool.
1378 * @param pPage The page.
1379 * @param pShwPT The shadow page table (mapping of the page).
1380 * @param pGstPT The guest page table.
1381 */
1382DECLINLINE(void) pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
1383{
1384 unsigned cErrors = 0;
1385 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
1386 {
1387 if (pShwPT->a[i].n.u1Present)
1388 {
1389 RTHCPHYS HCPhys = -1;
1390 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1391 if ( rc != VINF_SUCCESS
1392 || (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) != HCPhys)
1393 {
1394 RTHCPHYS HCPhysPT = -1;
1395 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1396 cErrors++;
1397
1398 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pPage->GCPhys, &HCPhysPT);
1399 AssertRC(rc);
1400
1401 for (unsigned i = 0; i < pPool->cCurPages; i++)
1402 {
1403 PPGMPOOLPAGE pTempPage = &pPool->aPages[i];
1404
1405 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1406 {
1407 PX86PTPAE pShwPT2 = (PX86PTPAE)PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pTempPage);
1408
1409 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1410 {
1411 if ( pShwPT2->a[j].n.u1Present
1412 && pShwPT2->a[j].n.u1Write
1413 && ((pShwPT2->a[j].u & X86_PTE_PAE_PG_MASK) == HCPhysPT))
1414 {
1415 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, pShwPT->a[j].u, pShwPT2->a[j].u));
1416 }
1417 }
1418 }
1419 }
1420 }
1421 }
1422 }
1423 Assert(!cErrors);
1424}
1425# endif /* VBOX_STRICT */
1426
1427/**
1428 * Clear references to guest physical memory in a PAE / PAE page table.
1429 *
1430 * @returns nr of changed PTEs
1431 * @param pPool The pool.
1432 * @param pPage The page.
1433 * @param pShwPT The shadow page table (mapping of the page).
1434 * @param pGstPT The guest page table.
1435 * @param pOldGstPT The old cached guest page table.
1436 */
1437DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT, PCX86PTPAE pOldGstPT)
1438{
1439 unsigned cChanged = 0;
1440
1441 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
1442 {
1443 if (pShwPT->a[i].n.u1Present)
1444 {
1445 /* The the old cached PTE is identical, then there's no need to flush the shadow copy. */
1446 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1447 {
1448#ifdef VBOX_STRICT
1449 RTHCPHYS HCPhys = -1;
1450 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1451 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1452#endif
1453 uint64_t uHostAttr = pShwPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1454 bool fHostRW = !!(pShwPT->a[i].u & X86_PTE_RW);
1455 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1456 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1457
1458 if ( uHostAttr == uGuestAttr
1459 && fHostRW <= fGuestRW)
1460 continue;
1461 }
1462 cChanged++;
1463 /* Something was changed, so flush it. */
1464 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1465 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1466 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
1467 ASMAtomicWriteSize(&pShwPT->a[i].u, 0);
1468 }
1469 }
1470 return cChanged;
1471}
1472
1473
1474/**
1475 * Flush a dirty page
1476 *
1477 * @param pVM VM Handle.
1478 * @param pPool The pool.
1479 * @param idxSlot Dirty array slot index
1480 * @param fForceRemoval Force removal from the dirty page list
1481 */
1482static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fForceRemoval = false)
1483{
1484 PPGMPOOLPAGE pPage;
1485 unsigned idxPage;
1486
1487 Assert(idxSlot < RT_ELEMENTS(pPool->aIdxDirtyPages));
1488 if (pPool->aIdxDirtyPages[idxSlot] == NIL_PGMPOOL_IDX)
1489 return;
1490
1491 idxPage = pPool->aIdxDirtyPages[idxSlot];
1492 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1493 pPage = &pPool->aPages[idxPage];
1494 Assert(pPage->idx == idxPage);
1495 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1496
1497 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1498 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1499
1500 /* Flush those PTEs that have changed. */
1501 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1502 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1503 void *pvGst;
1504 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1505 unsigned cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst, (PCX86PTPAE)&pPool->aDirtyPages[idxSlot][0]);
1506 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1507
1508 /** Note: we might want to consider keeping the dirty page active in case there were many changes. */
1509
1510 /* Write protect the page again to catch all write accesses. */
1511 rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys);
1512 Assert(rc == VINF_SUCCESS);
1513 pPage->fDirty = false;
1514
1515#ifdef VBOX_STRICT
1516 uint64_t fFlags = 0;
1517 RTHCPHYS HCPhys;
1518 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1519 AssertMsg( ( rc == VINF_SUCCESS
1520 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1521 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1522 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1523 || rc == VERR_PAGE_NOT_PRESENT,
1524 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1525#endif
1526
1527 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1528 Assert(pPage->cModifications);
1529 if (cChanges < 4)
1530 pPage->cModifications = 1; /* must use > 0 here */
1531 else
1532 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1533
1534 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1535 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages))
1536 pPool->idxFreeDirtyPage = idxSlot;
1537
1538 pPool->cDirtyPages--;
1539 pPool->aIdxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1540 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1541 Log(("Removed dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1542}
1543
1544# ifndef IN_RING3
1545/**
1546 * Add a new dirty page
1547 *
1548 * @param pVM VM Handle.
1549 * @param pPool The pool.
1550 * @param pPage The page.
1551 */
1552void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1553{
1554 unsigned idxFree;
1555
1556 Assert(PGMIsLocked(pVM));
1557 AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
1558 Assert(!pPage->fDirty);
1559
1560 idxFree = pPool->idxFreeDirtyPage;
1561 Assert(idxFree < RT_ELEMENTS(pPool->aIdxDirtyPages));
1562 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1563
1564 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aIdxDirtyPages))
1565 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* force removal */);
1566 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages));
1567 AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1568
1569 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1570
1571 /* Make a copy of the guest page table as we require valid GCPhys addresses when removing
1572 * references to physical pages. (the HCPhys linear lookup is *extremely* expensive!)
1573 */
1574 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1575 void *pvGst;
1576 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1577 memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
1578#ifdef VBOX_STRICT
1579 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1580#endif
1581
1582 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1583 pPage->fDirty = true;
1584 pPage->idxDirty = idxFree;
1585 pPool->aIdxDirtyPages[idxFree] = pPage->idx;
1586 pPool->cDirtyPages++;
1587
1588 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1589 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages)
1590 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1591 {
1592 unsigned i;
1593 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1594 {
1595 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1596 if (pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1597 {
1598 pPool->idxFreeDirtyPage = idxFree;
1599 break;
1600 }
1601 }
1602 Assert(i != RT_ELEMENTS(pPool->aIdxDirtyPages));
1603 }
1604
1605 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages) || pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1606 return;
1607}
1608# endif /* !IN_RING3 */
1609
1610/**
1611 * Check if the specified page is dirty (not write monitored)
1612 *
1613 * @return dirty or not
1614 * @param pVM VM Handle.
1615 * @param GCPhys Guest physical address
1616 */
1617bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1618{
1619 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1620 Assert(PGMIsLocked(pVM));
1621 if (!pPool->cDirtyPages)
1622 return false;
1623
1624 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1625
1626 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1627 {
1628 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1629 {
1630 PPGMPOOLPAGE pPage;
1631 unsigned idxPage = pPool->aIdxDirtyPages[i];
1632
1633 pPage = &pPool->aPages[idxPage];
1634 if (pPage->GCPhys == GCPhys)
1635 return true;
1636 }
1637 }
1638 return false;
1639}
1640
1641/**
1642 * Reset all dirty pages by reinstating page monitoring.
1643 *
1644 * @param pVM VM Handle.
1645 * @param fForceRemoval Force removal of all dirty pages
1646 */
1647void pgmPoolResetDirtyPages(PVM pVM, bool fForceRemoval)
1648{
1649 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1650 Assert(PGMIsLocked(pVM));
1651 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1652
1653 if (!pPool->cDirtyPages)
1654 return;
1655
1656 Log(("pgmPoolResetDirtyPages\n"));
1657 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1658 pgmPoolFlushDirtyPage(pVM, pPool, i, fForceRemoval);
1659
1660 pPool->idxFreeDirtyPage = 0;
1661 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1662 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1663 {
1664 unsigned i;
1665 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1666 {
1667 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1668 {
1669 pPool->idxFreeDirtyPage = i;
1670 break;
1671 }
1672 }
1673 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1674 }
1675
1676 Assert(pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages));
1677 return;
1678}
1679# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1680#endif /* PGMPOOL_WITH_MONITORING */
1681
1682#ifdef PGMPOOL_WITH_CACHE
1683
1684/**
1685 * Inserts a page into the GCPhys hash table.
1686 *
1687 * @param pPool The pool.
1688 * @param pPage The page.
1689 */
1690DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1691{
1692 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1693 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1694 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1695 pPage->iNext = pPool->aiHash[iHash];
1696 pPool->aiHash[iHash] = pPage->idx;
1697}
1698
1699
1700/**
1701 * Removes a page from the GCPhys hash table.
1702 *
1703 * @param pPool The pool.
1704 * @param pPage The page.
1705 */
1706DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1707{
1708 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1709 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1710 if (pPool->aiHash[iHash] == pPage->idx)
1711 pPool->aiHash[iHash] = pPage->iNext;
1712 else
1713 {
1714 uint16_t iPrev = pPool->aiHash[iHash];
1715 for (;;)
1716 {
1717 const int16_t i = pPool->aPages[iPrev].iNext;
1718 if (i == pPage->idx)
1719 {
1720 pPool->aPages[iPrev].iNext = pPage->iNext;
1721 break;
1722 }
1723 if (i == NIL_PGMPOOL_IDX)
1724 {
1725 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1726 break;
1727 }
1728 iPrev = i;
1729 }
1730 }
1731 pPage->iNext = NIL_PGMPOOL_IDX;
1732}
1733
1734
1735/**
1736 * Frees up one cache page.
1737 *
1738 * @returns VBox status code.
1739 * @retval VINF_SUCCESS on success.
1740 * @param pPool The pool.
1741 * @param iUser The user index.
1742 */
1743static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1744{
1745#ifndef IN_RC
1746 const PVM pVM = pPool->CTX_SUFF(pVM);
1747#endif
1748 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1749 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1750
1751 /*
1752 * Select one page from the tail of the age list.
1753 */
1754 PPGMPOOLPAGE pPage;
1755 for (unsigned iLoop = 0; ; iLoop++)
1756 {
1757 uint16_t iToFree = pPool->iAgeTail;
1758 if (iToFree == iUser)
1759 iToFree = pPool->aPages[iToFree].iAgePrev;
1760/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1761 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1762 {
1763 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1764 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1765 {
1766 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1767 continue;
1768 iToFree = i;
1769 break;
1770 }
1771 }
1772*/
1773 Assert(iToFree != iUser);
1774 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1775 pPage = &pPool->aPages[iToFree];
1776
1777 /*
1778 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1779 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1780 */
1781 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1782 break;
1783 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1784 pgmPoolCacheUsed(pPool, pPage);
1785 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1786 }
1787
1788 /*
1789 * Found a usable page, flush it and return.
1790 */
1791 int rc = pgmPoolFlushPage(pPool, pPage);
1792 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1793 if (rc == VINF_SUCCESS)
1794 PGM_INVL_ALL_VCPU_TLBS(pVM);
1795 return rc;
1796}
1797
1798
1799/**
1800 * Checks if a kind mismatch is really a page being reused
1801 * or if it's just normal remappings.
1802 *
1803 * @returns true if reused and the cached page (enmKind1) should be flushed
1804 * @returns false if not reused.
1805 * @param enmKind1 The kind of the cached page.
1806 * @param enmKind2 The kind of the requested page.
1807 */
1808static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1809{
1810 switch (enmKind1)
1811 {
1812 /*
1813 * Never reuse them. There is no remapping in non-paging mode.
1814 */
1815 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1816 case PGMPOOLKIND_32BIT_PD_PHYS:
1817 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1818 case PGMPOOLKIND_PAE_PD_PHYS:
1819 case PGMPOOLKIND_PAE_PDPT_PHYS:
1820 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1821 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1822 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1823 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1824 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1825 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1826 return false;
1827
1828 /*
1829 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1830 */
1831 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1832 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1833 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1834 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1835 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1836 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1837 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1838 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1839 case PGMPOOLKIND_32BIT_PD:
1840 case PGMPOOLKIND_PAE_PDPT:
1841 switch (enmKind2)
1842 {
1843 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1844 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1845 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1846 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1847 case PGMPOOLKIND_64BIT_PML4:
1848 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1849 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1850 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1851 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1852 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1853 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1854 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1855 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1856 return true;
1857 default:
1858 return false;
1859 }
1860
1861 /*
1862 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1863 */
1864 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1865 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1866 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1867 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1868 case PGMPOOLKIND_64BIT_PML4:
1869 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1870 switch (enmKind2)
1871 {
1872 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1873 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1874 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1875 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1876 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1877 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1878 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1879 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1880 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1881 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1882 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1883 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1884 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1885 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1886 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1887 return true;
1888 default:
1889 return false;
1890 }
1891
1892 /*
1893 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1894 */
1895 case PGMPOOLKIND_ROOT_NESTED:
1896 return false;
1897
1898 default:
1899 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1900 }
1901}
1902
1903
1904/**
1905 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1906 *
1907 * @returns VBox status code.
1908 * @retval VINF_PGM_CACHED_PAGE on success.
1909 * @retval VERR_FILE_NOT_FOUND if not found.
1910 * @param pPool The pool.
1911 * @param GCPhys The GC physical address of the page we're gonna shadow.
1912 * @param enmKind The kind of mapping.
1913 * @param enmAccess Access type for the mapping (only relevant for big pages)
1914 * @param iUser The shadow page pool index of the user table.
1915 * @param iUserTable The index into the user table (shadowed).
1916 * @param ppPage Where to store the pointer to the page.
1917 */
1918static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1919{
1920#ifndef IN_RC
1921 const PVM pVM = pPool->CTX_SUFF(pVM);
1922#endif
1923 /*
1924 * Look up the GCPhys in the hash.
1925 */
1926 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1927 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
1928 if (i != NIL_PGMPOOL_IDX)
1929 {
1930 do
1931 {
1932 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1933 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1934 if (pPage->GCPhys == GCPhys)
1935 {
1936 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
1937 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
1938 {
1939 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
1940 * doesn't flush it in case there are no more free use records.
1941 */
1942 pgmPoolCacheUsed(pPool, pPage);
1943
1944 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1945 if (RT_SUCCESS(rc))
1946 {
1947 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
1948 *ppPage = pPage;
1949 if (pPage->cModifications)
1950 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
1951 STAM_COUNTER_INC(&pPool->StatCacheHits);
1952 return VINF_PGM_CACHED_PAGE;
1953 }
1954 return rc;
1955 }
1956
1957 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
1958 {
1959 /*
1960 * The kind is different. In some cases we should now flush the page
1961 * as it has been reused, but in most cases this is normal remapping
1962 * of PDs as PT or big pages using the GCPhys field in a slightly
1963 * different way than the other kinds.
1964 */
1965 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1966 {
1967 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1968 pgmPoolFlushPage(pPool, pPage);
1969 PGM_INVL_VCPU_TLBS(VMMGetCpu(pVM)); /* see PT handler. */
1970 break;
1971 }
1972 }
1973 }
1974
1975 /* next */
1976 i = pPage->iNext;
1977 } while (i != NIL_PGMPOOL_IDX);
1978 }
1979
1980 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
1981 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1982 return VERR_FILE_NOT_FOUND;
1983}
1984
1985
1986/**
1987 * Inserts a page into the cache.
1988 *
1989 * @param pPool The pool.
1990 * @param pPage The cached page.
1991 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1992 */
1993static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1994{
1995 /*
1996 * Insert into the GCPhys hash if the page is fit for that.
1997 */
1998 Assert(!pPage->fCached);
1999 if (fCanBeCached)
2000 {
2001 pPage->fCached = true;
2002 pgmPoolHashInsert(pPool, pPage);
2003 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2004 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2005 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2006 }
2007 else
2008 {
2009 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2010 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2011 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2012 }
2013
2014 /*
2015 * Insert at the head of the age list.
2016 */
2017 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2018 pPage->iAgeNext = pPool->iAgeHead;
2019 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2020 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2021 else
2022 pPool->iAgeTail = pPage->idx;
2023 pPool->iAgeHead = pPage->idx;
2024}
2025
2026
2027/**
2028 * Flushes a cached page.
2029 *
2030 * @param pPool The pool.
2031 * @param pPage The cached page.
2032 */
2033static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2034{
2035 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2036
2037 /*
2038 * Remove the page from the hash.
2039 */
2040 if (pPage->fCached)
2041 {
2042 pPage->fCached = false;
2043 pgmPoolHashRemove(pPool, pPage);
2044 }
2045 else
2046 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2047
2048 /*
2049 * Remove it from the age list.
2050 */
2051 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2052 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2053 else
2054 pPool->iAgeTail = pPage->iAgePrev;
2055 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2056 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2057 else
2058 pPool->iAgeHead = pPage->iAgeNext;
2059 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2060 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2061}
2062
2063#endif /* PGMPOOL_WITH_CACHE */
2064#ifdef PGMPOOL_WITH_MONITORING
2065
2066/**
2067 * Looks for pages sharing the monitor.
2068 *
2069 * @returns Pointer to the head page.
2070 * @returns NULL if not found.
2071 * @param pPool The Pool
2072 * @param pNewPage The page which is going to be monitored.
2073 */
2074static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2075{
2076#ifdef PGMPOOL_WITH_CACHE
2077 /*
2078 * Look up the GCPhys in the hash.
2079 */
2080 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2081 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2082 if (i == NIL_PGMPOOL_IDX)
2083 return NULL;
2084 do
2085 {
2086 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2087 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2088 && pPage != pNewPage)
2089 {
2090 switch (pPage->enmKind)
2091 {
2092 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2093 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2094 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2095 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2096 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2097 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2098 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2099 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2100 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2101 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2102 case PGMPOOLKIND_64BIT_PML4:
2103 case PGMPOOLKIND_32BIT_PD:
2104 case PGMPOOLKIND_PAE_PDPT:
2105 {
2106 /* find the head */
2107 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2108 {
2109 Assert(pPage->iMonitoredPrev != pPage->idx);
2110 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2111 }
2112 return pPage;
2113 }
2114
2115 /* ignore, no monitoring. */
2116 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2117 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2118 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2119 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2120 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2121 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2122 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2123 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2124 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2125 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2126 case PGMPOOLKIND_ROOT_NESTED:
2127 case PGMPOOLKIND_PAE_PD_PHYS:
2128 case PGMPOOLKIND_PAE_PDPT_PHYS:
2129 case PGMPOOLKIND_32BIT_PD_PHYS:
2130 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2131 break;
2132 default:
2133 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2134 }
2135 }
2136
2137 /* next */
2138 i = pPage->iNext;
2139 } while (i != NIL_PGMPOOL_IDX);
2140#endif
2141 return NULL;
2142}
2143
2144
2145/**
2146 * Enabled write monitoring of a guest page.
2147 *
2148 * @returns VBox status code.
2149 * @retval VINF_SUCCESS on success.
2150 * @param pPool The pool.
2151 * @param pPage The cached page.
2152 */
2153static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2154{
2155 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
2156
2157 /*
2158 * Filter out the relevant kinds.
2159 */
2160 switch (pPage->enmKind)
2161 {
2162 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2163 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2164 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2165 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2166 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2167 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2168 case PGMPOOLKIND_64BIT_PML4:
2169 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2170 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2171 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2172 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2173 case PGMPOOLKIND_32BIT_PD:
2174 case PGMPOOLKIND_PAE_PDPT:
2175 break;
2176
2177 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2178 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2179 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2180 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2181 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2182 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2183 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2184 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2185 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2186 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2187 case PGMPOOLKIND_ROOT_NESTED:
2188 /* Nothing to monitor here. */
2189 return VINF_SUCCESS;
2190
2191 case PGMPOOLKIND_32BIT_PD_PHYS:
2192 case PGMPOOLKIND_PAE_PDPT_PHYS:
2193 case PGMPOOLKIND_PAE_PD_PHYS:
2194 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2195 /* Nothing to monitor here. */
2196 return VINF_SUCCESS;
2197#ifdef PGMPOOL_WITH_MIXED_PT_CR3
2198 break;
2199#else
2200 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2201#endif
2202 default:
2203 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2204 }
2205
2206 /*
2207 * Install handler.
2208 */
2209 int rc;
2210 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2211 if (pPageHead)
2212 {
2213 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2214 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2215
2216#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2217 if (pPageHead->fDirty)
2218 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, true /* force removal */);
2219#endif
2220
2221 pPage->iMonitoredPrev = pPageHead->idx;
2222 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2223 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2224 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2225 pPageHead->iMonitoredNext = pPage->idx;
2226 rc = VINF_SUCCESS;
2227 }
2228 else
2229 {
2230 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2231 PVM pVM = pPool->CTX_SUFF(pVM);
2232 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2233 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2234 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
2235 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2236 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2237 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2238 pPool->pszAccessHandler);
2239 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2240 * the heap size should suffice. */
2241 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2242 Assert(!(VMMGetCpu(pVM)->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(VMMGetCpu(pVM), VMCPU_FF_PGM_SYNC_CR3));
2243 }
2244 pPage->fMonitored = true;
2245 return rc;
2246}
2247
2248
2249/**
2250 * Disables write monitoring of a guest page.
2251 *
2252 * @returns VBox status code.
2253 * @retval VINF_SUCCESS on success.
2254 * @param pPool The pool.
2255 * @param pPage The cached page.
2256 */
2257static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2258{
2259 /*
2260 * Filter out the relevant kinds.
2261 */
2262 switch (pPage->enmKind)
2263 {
2264 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2265 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2266 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2267 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2268 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2269 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2270 case PGMPOOLKIND_64BIT_PML4:
2271 case PGMPOOLKIND_32BIT_PD:
2272 case PGMPOOLKIND_PAE_PDPT:
2273 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2274 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2275 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2276 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2277 break;
2278
2279 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2280 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2281 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2282 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2283 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2284 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2285 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2286 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2287 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2288 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2289 case PGMPOOLKIND_ROOT_NESTED:
2290 case PGMPOOLKIND_PAE_PD_PHYS:
2291 case PGMPOOLKIND_PAE_PDPT_PHYS:
2292 case PGMPOOLKIND_32BIT_PD_PHYS:
2293 /* Nothing to monitor here. */
2294 return VINF_SUCCESS;
2295
2296#ifdef PGMPOOL_WITH_MIXED_PT_CR3
2297 break;
2298#endif
2299 default:
2300 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2301 }
2302
2303 /*
2304 * Remove the page from the monitored list or uninstall it if last.
2305 */
2306 const PVM pVM = pPool->CTX_SUFF(pVM);
2307 int rc;
2308 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2309 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2310 {
2311 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2312 {
2313 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2314 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2315 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2316 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2317 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2318 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2319 pPool->pszAccessHandler);
2320 AssertFatalRCSuccess(rc);
2321 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2322 }
2323 else
2324 {
2325 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2326 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2327 {
2328 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2329 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2330 }
2331 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2332 rc = VINF_SUCCESS;
2333 }
2334 }
2335 else
2336 {
2337 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2338 AssertFatalRC(rc);
2339#ifdef VBOX_STRICT
2340 PVMCPU pVCpu = VMMGetCpu(pVM);
2341#endif
2342 AssertMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2343 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2344 }
2345 pPage->fMonitored = false;
2346
2347 /*
2348 * Remove it from the list of modified pages (if in it).
2349 */
2350 pgmPoolMonitorModifiedRemove(pPool, pPage);
2351
2352 return rc;
2353}
2354
2355
2356/**
2357 * Inserts the page into the list of modified pages.
2358 *
2359 * @param pPool The pool.
2360 * @param pPage The page.
2361 */
2362void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2363{
2364 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2365 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2366 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2367 && pPool->iModifiedHead != pPage->idx,
2368 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2369 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2370 pPool->iModifiedHead, pPool->cModifiedPages));
2371
2372 pPage->iModifiedNext = pPool->iModifiedHead;
2373 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2374 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2375 pPool->iModifiedHead = pPage->idx;
2376 pPool->cModifiedPages++;
2377#ifdef VBOX_WITH_STATISTICS
2378 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2379 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2380#endif
2381}
2382
2383
2384/**
2385 * Removes the page from the list of modified pages and resets the
2386 * moficiation counter.
2387 *
2388 * @param pPool The pool.
2389 * @param pPage The page which is believed to be in the list of modified pages.
2390 */
2391static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2392{
2393 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2394 if (pPool->iModifiedHead == pPage->idx)
2395 {
2396 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2397 pPool->iModifiedHead = pPage->iModifiedNext;
2398 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2399 {
2400 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2401 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2402 }
2403 pPool->cModifiedPages--;
2404 }
2405 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2406 {
2407 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2408 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2409 {
2410 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2411 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2412 }
2413 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2414 pPool->cModifiedPages--;
2415 }
2416 else
2417 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2418 pPage->cModifications = 0;
2419}
2420
2421
2422/**
2423 * Zaps the list of modified pages, resetting their modification counters in the process.
2424 *
2425 * @param pVM The VM handle.
2426 */
2427static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2428{
2429 pgmLock(pVM);
2430 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2431 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2432
2433 unsigned cPages = 0; NOREF(cPages);
2434
2435#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2436 pgmPoolResetDirtyPages(pVM, true /* force removal. */);
2437#endif
2438
2439 uint16_t idx = pPool->iModifiedHead;
2440 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2441 while (idx != NIL_PGMPOOL_IDX)
2442 {
2443 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2444 idx = pPage->iModifiedNext;
2445 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2446 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2447 pPage->cModifications = 0;
2448 Assert(++cPages);
2449 }
2450 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2451 pPool->cModifiedPages = 0;
2452 pgmUnlock(pVM);
2453}
2454
2455
2456#ifdef IN_RING3
2457/**
2458 * Callback to clear all shadow pages and clear all modification counters.
2459 *
2460 * @returns VBox status code.
2461 * @param pVM The VM handle.
2462 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
2463 * @param pvUser Unused parameter.
2464 *
2465 * @remark Should only be used when monitoring is available, thus placed in
2466 * the PGMPOOL_WITH_MONITORING \#ifdef.
2467 */
2468DECLCALLBACK(int) pgmPoolClearAll(PVM pVM, PVMCPU pVCpu, void *pvUser)
2469{
2470 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2471 STAM_PROFILE_START(&pPool->StatClearAll, c);
2472 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2473 NOREF(pvUser); NOREF(pVCpu);
2474
2475 pgmLock(pVM);
2476
2477 /*
2478 * Iterate all the pages until we've encountered all that in use.
2479 * This is simple but not quite optimal solution.
2480 */
2481 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2482 unsigned cLeft = pPool->cUsedPages;
2483 unsigned iPage = pPool->cCurPages;
2484 while (--iPage >= PGMPOOL_IDX_FIRST)
2485 {
2486 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2487 if (pPage->GCPhys != NIL_RTGCPHYS)
2488 {
2489 switch (pPage->enmKind)
2490 {
2491 /*
2492 * We only care about shadow page tables.
2493 */
2494 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2495 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2496 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2497 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2498 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2499 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2500 {
2501#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2502 if (pPage->fDirty)
2503 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, true /* force removal */);
2504#endif
2505#ifdef PGMPOOL_WITH_USER_TRACKING
2506 if (pPage->cPresent)
2507#endif
2508 {
2509 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2510 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2511 ASMMemZeroPage(pvShw);
2512 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2513#ifdef PGMPOOL_WITH_USER_TRACKING
2514 pPage->cPresent = 0;
2515 pPage->iFirstPresent = ~0;
2516#endif
2517 }
2518#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2519 else
2520 Assert(!pPage->fDirty);
2521#endif
2522 }
2523 /* fall thru */
2524
2525 default:
2526 Assert(!pPage->cModifications || ++cModifiedPages);
2527 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2528 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2529 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2530 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2531 pPage->cModifications = 0;
2532 break;
2533
2534 }
2535 if (!--cLeft)
2536 break;
2537 }
2538 }
2539
2540 /* swipe the special pages too. */
2541 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2542 {
2543 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2544 if (pPage->GCPhys != NIL_RTGCPHYS)
2545 {
2546 Assert(!pPage->cModifications || ++cModifiedPages);
2547 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2548 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2549 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2550 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2551 pPage->cModifications = 0;
2552 }
2553 }
2554
2555#ifndef DEBUG_michael
2556 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2557#endif
2558 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2559 pPool->cModifiedPages = 0;
2560
2561#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2562 /*
2563 * Clear all the GCPhys links and rebuild the phys ext free list.
2564 */
2565 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2566 pRam;
2567 pRam = pRam->CTX_SUFF(pNext))
2568 {
2569 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2570 while (iPage-- > 0)
2571 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
2572 }
2573
2574 pPool->iPhysExtFreeHead = 0;
2575 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2576 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2577 for (unsigned i = 0; i < cMaxPhysExts; i++)
2578 {
2579 paPhysExts[i].iNext = i + 1;
2580 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2581 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2582 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2583 }
2584 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2585#endif
2586
2587#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2588 /* Clear all dirty pages. */
2589 pPool->idxFreeDirtyPage = 0;
2590 pPool->cDirtyPages = 0;
2591 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
2592 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
2593#endif
2594
2595 /* Clear the PGM_SYNC_CLEAR_PGM_POOL flag on all VCPUs to prevent redundant flushes. */
2596 for (unsigned idCpu = 0; idCpu < pVM->cCPUs; idCpu++)
2597 {
2598 PVMCPU pVCpu = &pVM->aCpus[idCpu];
2599
2600 pVCpu->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2601 }
2602
2603 pPool->cPresent = 0;
2604 pgmUnlock(pVM);
2605 PGM_INVL_ALL_VCPU_TLBS(pVM);
2606 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2607 return VINF_SUCCESS;
2608}
2609#endif /* IN_RING3 */
2610
2611
2612/**
2613 * Handle SyncCR3 pool tasks
2614 *
2615 * @returns VBox status code.
2616 * @retval VINF_SUCCESS if successfully added.
2617 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2618 * @param pVCpu The VMCPU handle.
2619 * @remark Should only be used when monitoring is available, thus placed in
2620 * the PGMPOOL_WITH_MONITORING #ifdef.
2621 */
2622int pgmPoolSyncCR3(PVMCPU pVCpu)
2623{
2624 PVM pVM = pVCpu->CTX_SUFF(pVM);
2625 LogFlow(("pgmPoolSyncCR3\n"));
2626
2627 /*
2628 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2629 * Occasionally we will have to clear all the shadow page tables because we wanted
2630 * to monitor a page which was mapped by too many shadowed page tables. This operation
2631 * sometimes refered to as a 'lightweight flush'.
2632 */
2633# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2634 if (ASMBitTestAndClear(&pVCpu->pgm.s.fSyncFlags, PGM_SYNC_CLEAR_PGM_POOL_BIT))
2635 {
2636 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmPoolClearAll, NULL);
2637 AssertRC(rc);
2638 }
2639# else /* !IN_RING3 */
2640 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2641 {
2642 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2643 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2644 return VINF_PGM_SYNC_CR3;
2645 }
2646# endif /* !IN_RING3 */
2647 else
2648 pgmPoolMonitorModifiedClearAll(pVM);
2649
2650 return VINF_SUCCESS;
2651}
2652
2653#endif /* PGMPOOL_WITH_MONITORING */
2654#ifdef PGMPOOL_WITH_USER_TRACKING
2655
2656/**
2657 * Frees up at least one user entry.
2658 *
2659 * @returns VBox status code.
2660 * @retval VINF_SUCCESS if successfully added.
2661 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2662 * @param pPool The pool.
2663 * @param iUser The user index.
2664 */
2665static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2666{
2667 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2668#ifdef PGMPOOL_WITH_CACHE
2669 /*
2670 * Just free cached pages in a braindead fashion.
2671 */
2672 /** @todo walk the age list backwards and free the first with usage. */
2673 int rc = VINF_SUCCESS;
2674 do
2675 {
2676 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2677 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2678 rc = rc2;
2679 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2680 return rc;
2681#else
2682 /*
2683 * Lazy approach.
2684 */
2685 /* @todo This path no longer works (CR3 root pages will be flushed)!! */
2686 AssertCompileFailed();
2687 Assert(!CPUMIsGuestInLongMode(pVM));
2688 pgmPoolFlushAllInt(pPool);
2689 return VERR_PGM_POOL_FLUSHED;
2690#endif
2691}
2692
2693
2694/**
2695 * Inserts a page into the cache.
2696 *
2697 * This will create user node for the page, insert it into the GCPhys
2698 * hash, and insert it into the age list.
2699 *
2700 * @returns VBox status code.
2701 * @retval VINF_SUCCESS if successfully added.
2702 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2703 * @param pPool The pool.
2704 * @param pPage The cached page.
2705 * @param GCPhys The GC physical address of the page we're gonna shadow.
2706 * @param iUser The user index.
2707 * @param iUserTable The user table index.
2708 */
2709DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2710{
2711 int rc = VINF_SUCCESS;
2712 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2713
2714 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2715
2716#ifdef VBOX_STRICT
2717 /*
2718 * Check that the entry doesn't already exists.
2719 */
2720 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2721 {
2722 uint16_t i = pPage->iUserHead;
2723 do
2724 {
2725 Assert(i < pPool->cMaxUsers);
2726 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2727 i = paUsers[i].iNext;
2728 } while (i != NIL_PGMPOOL_USER_INDEX);
2729 }
2730#endif
2731
2732 /*
2733 * Find free a user node.
2734 */
2735 uint16_t i = pPool->iUserFreeHead;
2736 if (i == NIL_PGMPOOL_USER_INDEX)
2737 {
2738 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2739 if (RT_FAILURE(rc))
2740 return rc;
2741 i = pPool->iUserFreeHead;
2742 }
2743
2744 /*
2745 * Unlink the user node from the free list,
2746 * initialize and insert it into the user list.
2747 */
2748 pPool->iUserFreeHead = paUsers[i].iNext;
2749 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2750 paUsers[i].iUser = iUser;
2751 paUsers[i].iUserTable = iUserTable;
2752 pPage->iUserHead = i;
2753
2754 /*
2755 * Insert into cache and enable monitoring of the guest page if enabled.
2756 *
2757 * Until we implement caching of all levels, including the CR3 one, we'll
2758 * have to make sure we don't try monitor & cache any recursive reuse of
2759 * a monitored CR3 page. Because all windows versions are doing this we'll
2760 * have to be able to do combined access monitoring, CR3 + PT and
2761 * PD + PT (guest PAE).
2762 *
2763 * Update:
2764 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2765 */
2766#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2767# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2768 const bool fCanBeMonitored = true;
2769# else
2770 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2771 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2772 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2773# endif
2774# ifdef PGMPOOL_WITH_CACHE
2775 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2776# endif
2777 if (fCanBeMonitored)
2778 {
2779# ifdef PGMPOOL_WITH_MONITORING
2780 rc = pgmPoolMonitorInsert(pPool, pPage);
2781 AssertRC(rc);
2782 }
2783# endif
2784#endif /* PGMPOOL_WITH_MONITORING */
2785 return rc;
2786}
2787
2788
2789# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2790/**
2791 * Adds a user reference to a page.
2792 *
2793 * This will move the page to the head of the
2794 *
2795 * @returns VBox status code.
2796 * @retval VINF_SUCCESS if successfully added.
2797 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2798 * @param pPool The pool.
2799 * @param pPage The cached page.
2800 * @param iUser The user index.
2801 * @param iUserTable The user table.
2802 */
2803static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2804{
2805 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2806
2807 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2808
2809# ifdef VBOX_STRICT
2810 /*
2811 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2812 */
2813 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2814 {
2815 uint16_t i = pPage->iUserHead;
2816 do
2817 {
2818 Assert(i < pPool->cMaxUsers);
2819 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2820 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2821 i = paUsers[i].iNext;
2822 } while (i != NIL_PGMPOOL_USER_INDEX);
2823 }
2824# endif
2825
2826 /*
2827 * Allocate a user node.
2828 */
2829 uint16_t i = pPool->iUserFreeHead;
2830 if (i == NIL_PGMPOOL_USER_INDEX)
2831 {
2832 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2833 if (RT_FAILURE(rc))
2834 return rc;
2835 i = pPool->iUserFreeHead;
2836 }
2837 pPool->iUserFreeHead = paUsers[i].iNext;
2838
2839 /*
2840 * Initialize the user node and insert it.
2841 */
2842 paUsers[i].iNext = pPage->iUserHead;
2843 paUsers[i].iUser = iUser;
2844 paUsers[i].iUserTable = iUserTable;
2845 pPage->iUserHead = i;
2846
2847# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2848 if (pPage->fDirty)
2849 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, true /* force removal */);
2850# endif
2851
2852# ifdef PGMPOOL_WITH_CACHE
2853 /*
2854 * Tell the cache to update its replacement stats for this page.
2855 */
2856 pgmPoolCacheUsed(pPool, pPage);
2857# endif
2858 return VINF_SUCCESS;
2859}
2860# endif /* PGMPOOL_WITH_CACHE */
2861
2862
2863/**
2864 * Frees a user record associated with a page.
2865 *
2866 * This does not clear the entry in the user table, it simply replaces the
2867 * user record to the chain of free records.
2868 *
2869 * @param pPool The pool.
2870 * @param HCPhys The HC physical address of the shadow page.
2871 * @param iUser The shadow page pool index of the user table.
2872 * @param iUserTable The index into the user table (shadowed).
2873 */
2874static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2875{
2876 /*
2877 * Unlink and free the specified user entry.
2878 */
2879 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2880
2881 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2882 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2883 uint16_t i = pPage->iUserHead;
2884 if ( i != NIL_PGMPOOL_USER_INDEX
2885 && paUsers[i].iUser == iUser
2886 && paUsers[i].iUserTable == iUserTable)
2887 {
2888 pPage->iUserHead = paUsers[i].iNext;
2889
2890 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2891 paUsers[i].iNext = pPool->iUserFreeHead;
2892 pPool->iUserFreeHead = i;
2893 return;
2894 }
2895
2896 /* General: Linear search. */
2897 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2898 while (i != NIL_PGMPOOL_USER_INDEX)
2899 {
2900 if ( paUsers[i].iUser == iUser
2901 && paUsers[i].iUserTable == iUserTable)
2902 {
2903 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2904 paUsers[iPrev].iNext = paUsers[i].iNext;
2905 else
2906 pPage->iUserHead = paUsers[i].iNext;
2907
2908 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2909 paUsers[i].iNext = pPool->iUserFreeHead;
2910 pPool->iUserFreeHead = i;
2911 return;
2912 }
2913 iPrev = i;
2914 i = paUsers[i].iNext;
2915 }
2916
2917 /* Fatal: didn't find it */
2918 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2919 iUser, iUserTable, pPage->GCPhys));
2920}
2921
2922
2923/**
2924 * Gets the entry size of a shadow table.
2925 *
2926 * @param enmKind The kind of page.
2927 *
2928 * @returns The size of the entry in bytes. That is, 4 or 8.
2929 * @returns If the kind is not for a table, an assertion is raised and 0 is
2930 * returned.
2931 */
2932DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2933{
2934 switch (enmKind)
2935 {
2936 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2937 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2938 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2939 case PGMPOOLKIND_32BIT_PD:
2940 case PGMPOOLKIND_32BIT_PD_PHYS:
2941 return 4;
2942
2943 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2944 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2945 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2946 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2947 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2948 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2949 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2950 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2951 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2952 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2953 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2954 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2955 case PGMPOOLKIND_64BIT_PML4:
2956 case PGMPOOLKIND_PAE_PDPT:
2957 case PGMPOOLKIND_ROOT_NESTED:
2958 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2959 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2960 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2961 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2962 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2963 case PGMPOOLKIND_PAE_PD_PHYS:
2964 case PGMPOOLKIND_PAE_PDPT_PHYS:
2965 return 8;
2966
2967 default:
2968 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2969 }
2970}
2971
2972
2973/**
2974 * Gets the entry size of a guest table.
2975 *
2976 * @param enmKind The kind of page.
2977 *
2978 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2979 * @returns If the kind is not for a table, an assertion is raised and 0 is
2980 * returned.
2981 */
2982DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2983{
2984 switch (enmKind)
2985 {
2986 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2987 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2988 case PGMPOOLKIND_32BIT_PD:
2989 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2990 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2991 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2992 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2993 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2994 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2995 return 4;
2996
2997 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2998 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2999 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3000 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3001 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3002 case PGMPOOLKIND_64BIT_PML4:
3003 case PGMPOOLKIND_PAE_PDPT:
3004 return 8;
3005
3006 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3007 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3008 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3009 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3010 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3011 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3012 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3013 case PGMPOOLKIND_ROOT_NESTED:
3014 case PGMPOOLKIND_PAE_PD_PHYS:
3015 case PGMPOOLKIND_PAE_PDPT_PHYS:
3016 case PGMPOOLKIND_32BIT_PD_PHYS:
3017 /** @todo can we return 0? (nobody is calling this...) */
3018 AssertFailed();
3019 return 0;
3020
3021 default:
3022 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3023 }
3024}
3025
3026#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3027
3028/**
3029 * Scans one shadow page table for mappings of a physical page.
3030 *
3031 * @param pVM The VM handle.
3032 * @param pPhysPage The guest page in question.
3033 * @param iShw The shadow page table.
3034 * @param cRefs The number of references made in that PT.
3035 */
3036static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
3037{
3038 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
3039 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3040
3041 /*
3042 * Assert sanity.
3043 */
3044 Assert(cRefs == 1);
3045 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3046 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3047
3048 /*
3049 * Then, clear the actual mappings to the page in the shadow PT.
3050 */
3051 switch (pPage->enmKind)
3052 {
3053 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3054 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3055 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3056 {
3057 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3058 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3059 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3060 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3061 {
3062 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
3063 pPT->a[i].u = 0;
3064 cRefs--;
3065 if (!cRefs)
3066 return;
3067 }
3068#ifdef LOG_ENABLED
3069 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3070 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3071 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3072 {
3073 Log(("i=%d cRefs=%d\n", i, cRefs--));
3074 }
3075#endif
3076 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3077 break;
3078 }
3079
3080 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3081 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3082 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3083 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3084 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3085 {
3086 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3087 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3088 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3089 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3090 {
3091 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3092 pPT->a[i].u = 0;
3093 cRefs--;
3094 if (!cRefs)
3095 return;
3096 }
3097#ifdef LOG_ENABLED
3098 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3099 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3100 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3101 {
3102 Log(("i=%d cRefs=%d\n", i, cRefs--));
3103 }
3104#endif
3105 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
3106 break;
3107 }
3108
3109 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3110 {
3111 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3112 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3113 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3114 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3115 {
3116 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3117 pPT->a[i].u = 0;
3118 cRefs--;
3119 if (!cRefs)
3120 return;
3121 }
3122#ifdef LOG_ENABLED
3123 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3124 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3125 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3126 {
3127 Log(("i=%d cRefs=%d\n", i, cRefs--));
3128 }
3129#endif
3130 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3131 break;
3132 }
3133
3134 default:
3135 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3136 }
3137}
3138
3139
3140/**
3141 * Scans one shadow page table for mappings of a physical page.
3142 *
3143 * @param pVM The VM handle.
3144 * @param pPhysPage The guest page in question.
3145 * @param iShw The shadow page table.
3146 * @param cRefs The number of references made in that PT.
3147 */
3148void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
3149{
3150 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3151 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
3152 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3153 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
3154 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3155 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3156}
3157
3158
3159/**
3160 * Flushes a list of shadow page tables mapping the same physical page.
3161 *
3162 * @param pVM The VM handle.
3163 * @param pPhysPage The guest page in question.
3164 * @param iPhysExt The physical cross reference extent list to flush.
3165 */
3166void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
3167{
3168 Assert(PGMIsLockOwner(pVM));
3169 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3170 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3171 LogFlow(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%R[pgmpage] iPhysExt\n", pPhysPage, iPhysExt));
3172
3173 const uint16_t iPhysExtStart = iPhysExt;
3174 PPGMPOOLPHYSEXT pPhysExt;
3175 do
3176 {
3177 Assert(iPhysExt < pPool->cMaxPhysExts);
3178 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3179 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3180 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3181 {
3182 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
3183 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3184 }
3185
3186 /* next */
3187 iPhysExt = pPhysExt->iNext;
3188 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3189
3190 /* insert the list into the free list and clear the ram range entry. */
3191 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3192 pPool->iPhysExtFreeHead = iPhysExtStart;
3193 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3194
3195 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3196}
3197
3198#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3199
3200/**
3201 * Flushes all shadow page table mappings of the given guest page.
3202 *
3203 * This is typically called when the host page backing the guest one has been
3204 * replaced or when the page protection was changed due to an access handler.
3205 *
3206 * @returns VBox status code.
3207 * @retval VINF_SUCCESS if all references has been successfully cleared.
3208 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3209 * pool cleaning. FF and sync flags are set.
3210 *
3211 * @param pVM The VM handle.
3212 * @param pPhysPage The guest page in question.
3213 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3214 * flushed, it is NOT touched if this isn't necessary.
3215 * The caller MUST initialized this to @a false.
3216 */
3217int pgmPoolTrackFlushGCPhys(PVM pVM, PPGMPAGE pPhysPage, bool *pfFlushTLBs)
3218{
3219 PVMCPU pVCpu = VMMGetCpu(pVM);
3220 pgmLock(pVM);
3221 int rc = VINF_SUCCESS;
3222#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3223 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3224 if (u16)
3225 {
3226 /*
3227 * The zero page is currently screwing up the tracking and we'll
3228 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3229 * is defined, zero pages won't normally be mapped. Some kind of solution
3230 * will be needed for this problem of course, but it will have to wait...
3231 */
3232 if (PGM_PAGE_IS_ZERO(pPhysPage))
3233 rc = VINF_PGM_GCPHYS_ALIASED;
3234 else
3235 {
3236# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3237 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3238 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3239 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3240# endif
3241
3242 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3243 pgmPoolTrackFlushGCPhysPT(pVM,
3244 pPhysPage,
3245 PGMPOOL_TD_GET_IDX(u16),
3246 PGMPOOL_TD_GET_CREFS(u16));
3247 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3248 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, PGMPOOL_TD_GET_IDX(u16));
3249 else
3250 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3251 *pfFlushTLBs = true;
3252
3253# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3254 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3255# endif
3256 }
3257 }
3258
3259#elif defined(PGMPOOL_WITH_CACHE)
3260 if (PGM_PAGE_IS_ZERO(pPhysPage))
3261 rc = VINF_PGM_GCPHYS_ALIASED;
3262 else
3263 {
3264# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3265 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow kills the pool otherwise. */
3266 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3267# endif
3268 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3269 if (rc == VINF_SUCCESS)
3270 *pfFlushTLBs = true;
3271 }
3272
3273# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3274 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3275# endif
3276
3277#else
3278 rc = VINF_PGM_GCPHYS_ALIASED;
3279#endif
3280
3281 if (rc == VINF_PGM_GCPHYS_ALIASED)
3282 {
3283 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3284 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3285 rc = VINF_PGM_SYNC_CR3;
3286 }
3287 pgmUnlock(pVM);
3288 return rc;
3289}
3290
3291
3292/**
3293 * Scans all shadow page tables for mappings of a physical page.
3294 *
3295 * This may be slow, but it's most likely more efficient than cleaning
3296 * out the entire page pool / cache.
3297 *
3298 * @returns VBox status code.
3299 * @retval VINF_SUCCESS if all references has been successfully cleared.
3300 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3301 * a page pool cleaning.
3302 *
3303 * @param pVM The VM handle.
3304 * @param pPhysPage The guest page in question.
3305 */
3306int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3307{
3308 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3309 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3310 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3311 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3312
3313#if 1
3314 /*
3315 * There is a limit to what makes sense.
3316 */
3317 if (pPool->cPresent > 1024)
3318 {
3319 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3320 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3321 return VINF_PGM_GCPHYS_ALIASED;
3322 }
3323#endif
3324
3325 /*
3326 * Iterate all the pages until we've encountered all that in use.
3327 * This is simple but not quite optimal solution.
3328 */
3329 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3330 const uint32_t u32 = u64;
3331 unsigned cLeft = pPool->cUsedPages;
3332 unsigned iPage = pPool->cCurPages;
3333 while (--iPage >= PGMPOOL_IDX_FIRST)
3334 {
3335 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3336 if (pPage->GCPhys != NIL_RTGCPHYS)
3337 {
3338 switch (pPage->enmKind)
3339 {
3340 /*
3341 * We only care about shadow page tables.
3342 */
3343 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3344 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3345 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3346 {
3347 unsigned cPresent = pPage->cPresent;
3348 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3349 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3350 if (pPT->a[i].n.u1Present)
3351 {
3352 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3353 {
3354 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3355 pPT->a[i].u = 0;
3356 }
3357 if (!--cPresent)
3358 break;
3359 }
3360 break;
3361 }
3362
3363 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3364 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3365 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3366 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3367 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3368 {
3369 unsigned cPresent = pPage->cPresent;
3370 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3371 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3372 if (pPT->a[i].n.u1Present)
3373 {
3374 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3375 {
3376 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3377 pPT->a[i].u = 0;
3378 }
3379 if (!--cPresent)
3380 break;
3381 }
3382 break;
3383 }
3384 }
3385 if (!--cLeft)
3386 break;
3387 }
3388 }
3389
3390 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3391 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3392 return VINF_SUCCESS;
3393}
3394
3395
3396/**
3397 * Clears the user entry in a user table.
3398 *
3399 * This is used to remove all references to a page when flushing it.
3400 */
3401static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3402{
3403 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3404 Assert(pUser->iUser < pPool->cCurPages);
3405 uint32_t iUserTable = pUser->iUserTable;
3406
3407 /*
3408 * Map the user page.
3409 */
3410 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3411 union
3412 {
3413 uint64_t *pau64;
3414 uint32_t *pau32;
3415 } u;
3416 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3417
3418 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3419
3420 /* Safety precaution in case we change the paging for other modes too in the future. */
3421 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3422
3423#ifdef VBOX_STRICT
3424 /*
3425 * Some sanity checks.
3426 */
3427 switch (pUserPage->enmKind)
3428 {
3429 case PGMPOOLKIND_32BIT_PD:
3430 case PGMPOOLKIND_32BIT_PD_PHYS:
3431 Assert(iUserTable < X86_PG_ENTRIES);
3432 break;
3433 case PGMPOOLKIND_PAE_PDPT:
3434 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3435 case PGMPOOLKIND_PAE_PDPT_PHYS:
3436 Assert(iUserTable < 4);
3437 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3438 break;
3439 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3440 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3441 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3442 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3443 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3444 case PGMPOOLKIND_PAE_PD_PHYS:
3445 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3446 break;
3447 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3448 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3449 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3450 break;
3451 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3452 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3453 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3454 break;
3455 case PGMPOOLKIND_64BIT_PML4:
3456 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3457 /* GCPhys >> PAGE_SHIFT is the index here */
3458 break;
3459 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3460 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3461 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3462 break;
3463
3464 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3465 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3466 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3467 break;
3468
3469 case PGMPOOLKIND_ROOT_NESTED:
3470 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3471 break;
3472
3473 default:
3474 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3475 break;
3476 }
3477#endif /* VBOX_STRICT */
3478
3479 /*
3480 * Clear the entry in the user page.
3481 */
3482 switch (pUserPage->enmKind)
3483 {
3484 /* 32-bit entries */
3485 case PGMPOOLKIND_32BIT_PD:
3486 case PGMPOOLKIND_32BIT_PD_PHYS:
3487 u.pau32[iUserTable] = 0;
3488 break;
3489
3490 /* 64-bit entries */
3491 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3492 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3493 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3494 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3495 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3496#if defined(IN_RC)
3497 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3498 * non-present PDPT will continue to cause page faults.
3499 */
3500 ASMReloadCR3();
3501#endif
3502 /* no break */
3503 case PGMPOOLKIND_PAE_PD_PHYS:
3504 case PGMPOOLKIND_PAE_PDPT_PHYS:
3505 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3506 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3507 case PGMPOOLKIND_64BIT_PML4:
3508 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3509 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3510 case PGMPOOLKIND_PAE_PDPT:
3511 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3512 case PGMPOOLKIND_ROOT_NESTED:
3513 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3514 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3515 u.pau64[iUserTable] = 0;
3516 break;
3517
3518 default:
3519 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3520 }
3521}
3522
3523
3524/**
3525 * Clears all users of a page.
3526 */
3527static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3528{
3529 /*
3530 * Free all the user records.
3531 */
3532 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3533
3534 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3535 uint16_t i = pPage->iUserHead;
3536 while (i != NIL_PGMPOOL_USER_INDEX)
3537 {
3538 /* Clear enter in user table. */
3539 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3540
3541 /* Free it. */
3542 const uint16_t iNext = paUsers[i].iNext;
3543 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3544 paUsers[i].iNext = pPool->iUserFreeHead;
3545 pPool->iUserFreeHead = i;
3546
3547 /* Next. */
3548 i = iNext;
3549 }
3550 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3551}
3552
3553#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3554
3555/**
3556 * Allocates a new physical cross reference extent.
3557 *
3558 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3559 * @param pVM The VM handle.
3560 * @param piPhysExt Where to store the phys ext index.
3561 */
3562PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3563{
3564 Assert(PGMIsLockOwner(pVM));
3565 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3566 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3567 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3568 {
3569 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3570 return NULL;
3571 }
3572 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3573 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3574 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3575 *piPhysExt = iPhysExt;
3576 return pPhysExt;
3577}
3578
3579
3580/**
3581 * Frees a physical cross reference extent.
3582 *
3583 * @param pVM The VM handle.
3584 * @param iPhysExt The extent to free.
3585 */
3586void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3587{
3588 Assert(PGMIsLockOwner(pVM));
3589 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3590 Assert(iPhysExt < pPool->cMaxPhysExts);
3591 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3592 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3593 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3594 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3595 pPool->iPhysExtFreeHead = iPhysExt;
3596}
3597
3598
3599/**
3600 * Frees a physical cross reference extent.
3601 *
3602 * @param pVM The VM handle.
3603 * @param iPhysExt The extent to free.
3604 */
3605void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3606{
3607 Assert(PGMIsLockOwner(pVM));
3608 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3609
3610 const uint16_t iPhysExtStart = iPhysExt;
3611 PPGMPOOLPHYSEXT pPhysExt;
3612 do
3613 {
3614 Assert(iPhysExt < pPool->cMaxPhysExts);
3615 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3616 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3617 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3618
3619 /* next */
3620 iPhysExt = pPhysExt->iNext;
3621 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3622
3623 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3624 pPool->iPhysExtFreeHead = iPhysExtStart;
3625}
3626
3627
3628/**
3629 * Insert a reference into a list of physical cross reference extents.
3630 *
3631 * @returns The new tracking data for PGMPAGE.
3632 *
3633 * @param pVM The VM handle.
3634 * @param iPhysExt The physical extent index of the list head.
3635 * @param iShwPT The shadow page table index.
3636 *
3637 */
3638static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3639{
3640 Assert(PGMIsLockOwner(pVM));
3641 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3642 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3643
3644 /* special common case. */
3645 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3646 {
3647 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3648 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3649 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d}\n", iPhysExt, iShwPT));
3650 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3651 }
3652
3653 /* general treatment. */
3654 const uint16_t iPhysExtStart = iPhysExt;
3655 unsigned cMax = 15;
3656 for (;;)
3657 {
3658 Assert(iPhysExt < pPool->cMaxPhysExts);
3659 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3660 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3661 {
3662 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3663 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3664 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3665 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3666 }
3667 if (!--cMax)
3668 {
3669 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3670 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3671 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3672 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3673 }
3674 }
3675
3676 /* add another extent to the list. */
3677 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3678 if (!pNew)
3679 {
3680 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3681 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3682 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3683 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3684 }
3685 pNew->iNext = iPhysExtStart;
3686 pNew->aidx[0] = iShwPT;
3687 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3688 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3689}
3690
3691
3692/**
3693 * Add a reference to guest physical page where extents are in use.
3694 *
3695 * @returns The new tracking data for PGMPAGE.
3696 *
3697 * @param pVM The VM handle.
3698 * @param u16 The ram range flags (top 16-bits).
3699 * @param iShwPT The shadow page table index.
3700 */
3701uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3702{
3703 pgmLock(pVM);
3704 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3705 {
3706 /*
3707 * Convert to extent list.
3708 */
3709 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3710 uint16_t iPhysExt;
3711 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3712 if (pPhysExt)
3713 {
3714 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3715 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3716 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3717 pPhysExt->aidx[1] = iShwPT;
3718 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3719 }
3720 else
3721 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3722 }
3723 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3724 {
3725 /*
3726 * Insert into the extent list.
3727 */
3728 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3729 }
3730 else
3731 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3732 pgmUnlock(pVM);
3733 return u16;
3734}
3735
3736
3737/**
3738 * Clear references to guest physical memory.
3739 *
3740 * @param pPool The pool.
3741 * @param pPage The page.
3742 * @param pPhysPage Pointer to the aPages entry in the ram range.
3743 */
3744void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3745{
3746 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3747 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3748
3749 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3750 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3751 {
3752 PVM pVM = pPool->CTX_SUFF(pVM);
3753 pgmLock(pVM);
3754
3755 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3756 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3757 do
3758 {
3759 Assert(iPhysExt < pPool->cMaxPhysExts);
3760
3761 /*
3762 * Look for the shadow page and check if it's all freed.
3763 */
3764 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3765 {
3766 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3767 {
3768 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3769
3770 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3771 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3772 {
3773 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3774 pgmUnlock(pVM);
3775 return;
3776 }
3777
3778 /* we can free the node. */
3779 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3780 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3781 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3782 {
3783 /* lonely node */
3784 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3785 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3786 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3787 }
3788 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3789 {
3790 /* head */
3791 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3792 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3793 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3794 }
3795 else
3796 {
3797 /* in list */
3798 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3799 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3800 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3801 }
3802 iPhysExt = iPhysExtNext;
3803 pgmUnlock(pVM);
3804 return;
3805 }
3806 }
3807
3808 /* next */
3809 iPhysExtPrev = iPhysExt;
3810 iPhysExt = paPhysExts[iPhysExt].iNext;
3811 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3812
3813 pgmUnlock(pVM);
3814 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3815 }
3816 else /* nothing to do */
3817 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3818}
3819
3820
3821/**
3822 * Clear references to guest physical memory.
3823 *
3824 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3825 * is assumed to be correct, so the linear search can be skipped and we can assert
3826 * at an earlier point.
3827 *
3828 * @param pPool The pool.
3829 * @param pPage The page.
3830 * @param HCPhys The host physical address corresponding to the guest page.
3831 * @param GCPhys The guest physical address corresponding to HCPhys.
3832 */
3833static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3834{
3835 /*
3836 * Walk range list.
3837 */
3838 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3839 while (pRam)
3840 {
3841 RTGCPHYS off = GCPhys - pRam->GCPhys;
3842 if (off < pRam->cb)
3843 {
3844 /* does it match? */
3845 const unsigned iPage = off >> PAGE_SHIFT;
3846 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3847#ifdef LOG_ENABLED
3848RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3849Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3850#endif
3851 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3852 {
3853 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3854 return;
3855 }
3856 break;
3857 }
3858 pRam = pRam->CTX_SUFF(pNext);
3859 }
3860 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3861}
3862
3863
3864/**
3865 * Clear references to guest physical memory.
3866 *
3867 * @param pPool The pool.
3868 * @param pPage The page.
3869 * @param HCPhys The host physical address corresponding to the guest page.
3870 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3871 */
3872void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3873{
3874 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
3875
3876 /*
3877 * Walk range list.
3878 */
3879 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3880 while (pRam)
3881 {
3882 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3883 if (off < pRam->cb)
3884 {
3885 /* does it match? */
3886 const unsigned iPage = off >> PAGE_SHIFT;
3887 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3888 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3889 {
3890 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3891 return;
3892 }
3893 break;
3894 }
3895 pRam = pRam->CTX_SUFF(pNext);
3896 }
3897
3898 /*
3899 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3900 */
3901 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3902 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3903 while (pRam)
3904 {
3905 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3906 while (iPage-- > 0)
3907 {
3908 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3909 {
3910 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3911 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3912 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3913 return;
3914 }
3915 }
3916 pRam = pRam->CTX_SUFF(pNext);
3917 }
3918
3919 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3920}
3921
3922
3923/**
3924 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3925 *
3926 * @param pPool The pool.
3927 * @param pPage The page.
3928 * @param pShwPT The shadow page table (mapping of the page).
3929 * @param pGstPT The guest page table.
3930 */
3931DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3932{
3933 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3934 if (pShwPT->a[i].n.u1Present)
3935 {
3936 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3937 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3938 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3939 if (!--pPage->cPresent)
3940 break;
3941 }
3942}
3943
3944
3945/**
3946 * Clear references to guest physical memory in a PAE / 32-bit page table.
3947 *
3948 * @param pPool The pool.
3949 * @param pPage The page.
3950 * @param pShwPT The shadow page table (mapping of the page).
3951 * @param pGstPT The guest page table (just a half one).
3952 */
3953DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3954{
3955 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3956 if (pShwPT->a[i].n.u1Present)
3957 {
3958 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
3959 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3960 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3961 }
3962}
3963
3964
3965/**
3966 * Clear references to guest physical memory in a PAE / PAE page table.
3967 *
3968 * @param pPool The pool.
3969 * @param pPage The page.
3970 * @param pShwPT The shadow page table (mapping of the page).
3971 * @param pGstPT The guest page table.
3972 */
3973DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3974{
3975 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3976 if (pShwPT->a[i].n.u1Present)
3977 {
3978 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3979 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3980 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3981 }
3982}
3983
3984
3985/**
3986 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3987 *
3988 * @param pPool The pool.
3989 * @param pPage The page.
3990 * @param pShwPT The shadow page table (mapping of the page).
3991 */
3992DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3993{
3994 RTGCPHYS GCPhys = pPage->GCPhys;
3995 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3996 if (pShwPT->a[i].n.u1Present)
3997 {
3998 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3999 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4000 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
4001 }
4002}
4003
4004
4005/**
4006 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4007 *
4008 * @param pPool The pool.
4009 * @param pPage The page.
4010 * @param pShwPT The shadow page table (mapping of the page).
4011 */
4012DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
4013{
4014 RTGCPHYS GCPhys = pPage->GCPhys;
4015 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4016 if (pShwPT->a[i].n.u1Present)
4017 {
4018 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4019 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
4020 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
4021 }
4022}
4023
4024#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
4025
4026
4027/**
4028 * Clear references to shadowed pages in a 32 bits page directory.
4029 *
4030 * @param pPool The pool.
4031 * @param pPage The page.
4032 * @param pShwPD The shadow page directory (mapping of the page).
4033 */
4034DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4035{
4036 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4037 {
4038 if ( pShwPD->a[i].n.u1Present
4039 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4040 )
4041 {
4042 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4043 if (pSubPage)
4044 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4045 else
4046 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4047 }
4048 }
4049}
4050
4051/**
4052 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4053 *
4054 * @param pPool The pool.
4055 * @param pPage The page.
4056 * @param pShwPD The shadow page directory (mapping of the page).
4057 */
4058DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4059{
4060 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4061 {
4062 if ( pShwPD->a[i].n.u1Present
4063 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4064 )
4065 {
4066 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4067 if (pSubPage)
4068 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4069 else
4070 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4071 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4072 }
4073 }
4074}
4075
4076/**
4077 * Clear references to shadowed pages in a PAE page directory pointer table.
4078 *
4079 * @param pPool The pool.
4080 * @param pPage The page.
4081 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4082 */
4083DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4084{
4085 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4086 {
4087 if ( pShwPDPT->a[i].n.u1Present
4088 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4089 )
4090 {
4091 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4092 if (pSubPage)
4093 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4094 else
4095 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4096 }
4097 }
4098}
4099
4100
4101/**
4102 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4103 *
4104 * @param pPool The pool.
4105 * @param pPage The page.
4106 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4107 */
4108DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4109{
4110 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4111 {
4112 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
4113 if (pShwPDPT->a[i].n.u1Present)
4114 {
4115 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4116 if (pSubPage)
4117 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4118 else
4119 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4120 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4121 }
4122 }
4123}
4124
4125
4126/**
4127 * Clear references to shadowed pages in a 64-bit level 4 page table.
4128 *
4129 * @param pPool The pool.
4130 * @param pPage The page.
4131 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4132 */
4133DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4134{
4135 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4136 {
4137 if (pShwPML4->a[i].n.u1Present)
4138 {
4139 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4140 if (pSubPage)
4141 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4142 else
4143 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4144 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4145 }
4146 }
4147}
4148
4149
4150/**
4151 * Clear references to shadowed pages in an EPT page table.
4152 *
4153 * @param pPool The pool.
4154 * @param pPage The page.
4155 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4156 */
4157DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4158{
4159 RTGCPHYS GCPhys = pPage->GCPhys;
4160 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4161 if (pShwPT->a[i].n.u1Present)
4162 {
4163 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4164 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4165 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
4166 }
4167}
4168
4169
4170/**
4171 * Clear references to shadowed pages in an EPT page directory.
4172 *
4173 * @param pPool The pool.
4174 * @param pPage The page.
4175 * @param pShwPD The shadow page directory (mapping of the page).
4176 */
4177DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4178{
4179 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4180 {
4181 if (pShwPD->a[i].n.u1Present)
4182 {
4183 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4184 if (pSubPage)
4185 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4186 else
4187 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4188 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4189 }
4190 }
4191}
4192
4193
4194/**
4195 * Clear references to shadowed pages in an EPT page directory pointer table.
4196 *
4197 * @param pPool The pool.
4198 * @param pPage The page.
4199 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4200 */
4201DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4202{
4203 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4204 {
4205 if (pShwPDPT->a[i].n.u1Present)
4206 {
4207 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4208 if (pSubPage)
4209 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4210 else
4211 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4212 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4213 }
4214 }
4215}
4216
4217
4218/**
4219 * Clears all references made by this page.
4220 *
4221 * This includes other shadow pages and GC physical addresses.
4222 *
4223 * @param pPool The pool.
4224 * @param pPage The page.
4225 */
4226static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4227{
4228 /*
4229 * Map the shadow page and take action according to the page kind.
4230 */
4231 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
4232 switch (pPage->enmKind)
4233 {
4234#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4235 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4236 {
4237 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4238 void *pvGst;
4239 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4240 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4241 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4242 break;
4243 }
4244
4245 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4246 {
4247 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4248 void *pvGst;
4249 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4250 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
4251 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4252 break;
4253 }
4254
4255 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4256 {
4257 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4258 void *pvGst;
4259 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4260 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
4261 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4262 break;
4263 }
4264
4265 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4266 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4267 {
4268 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4269 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4270 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4271 break;
4272 }
4273
4274 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4275 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4276 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4277 {
4278 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4279 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4280 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4281 break;
4282 }
4283
4284#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4285 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4286 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4287 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4288 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4289 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4290 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4291 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4292 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4293 break;
4294#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4295
4296 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4297 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4298 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4299 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4300 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4301 case PGMPOOLKIND_PAE_PD_PHYS:
4302 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4303 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4304 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4305 break;
4306
4307 case PGMPOOLKIND_32BIT_PD_PHYS:
4308 case PGMPOOLKIND_32BIT_PD:
4309 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4310 break;
4311
4312 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4313 case PGMPOOLKIND_PAE_PDPT:
4314 case PGMPOOLKIND_PAE_PDPT_PHYS:
4315 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4316 break;
4317
4318 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4319 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4320 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4321 break;
4322
4323 case PGMPOOLKIND_64BIT_PML4:
4324 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4325 break;
4326
4327 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4328 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4329 break;
4330
4331 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4332 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4333 break;
4334
4335 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4336 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4337 break;
4338
4339 default:
4340 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4341 }
4342
4343 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4344 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4345 ASMMemZeroPage(pvShw);
4346 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4347 pPage->fZeroed = true;
4348 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4349}
4350#endif /* PGMPOOL_WITH_USER_TRACKING */
4351
4352/**
4353 * Flushes a pool page.
4354 *
4355 * This moves the page to the free list after removing all user references to it.
4356 *
4357 * @returns VBox status code.
4358 * @retval VINF_SUCCESS on success.
4359 * @param pPool The pool.
4360 * @param HCPhys The HC physical address of the shadow page.
4361 */
4362int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4363{
4364 PVM pVM = pPool->CTX_SUFF(pVM);
4365
4366 int rc = VINF_SUCCESS;
4367 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4368 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4369 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4370
4371 /*
4372 * Quietly reject any attempts at flushing any of the special root pages.
4373 */
4374 if (pPage->idx < PGMPOOL_IDX_FIRST)
4375 {
4376 AssertFailed(); /* can no longer happen */
4377 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4378 return VINF_SUCCESS;
4379 }
4380
4381 pgmLock(pVM);
4382
4383 /*
4384 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4385 */
4386 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4387 {
4388 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4389 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4390 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4391 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4392 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4393 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4394 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4395 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4396 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4397 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4398 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4399 pgmUnlock(pVM);
4400 return VINF_SUCCESS;
4401 }
4402
4403#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4404 /* Start a subset so we won't run out of mapping space. */
4405 PVMCPU pVCpu = VMMGetCpu(pVM);
4406 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4407#endif
4408
4409 /*
4410 * Mark the page as being in need of an ASMMemZeroPage().
4411 */
4412 pPage->fZeroed = false;
4413
4414#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4415 if (pPage->fDirty)
4416 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, true /* force removal */);
4417#endif
4418
4419#ifdef PGMPOOL_WITH_USER_TRACKING
4420 /*
4421 * Clear the page.
4422 */
4423 pgmPoolTrackClearPageUsers(pPool, pPage);
4424 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4425 pgmPoolTrackDeref(pPool, pPage);
4426 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4427#endif
4428
4429#ifdef PGMPOOL_WITH_CACHE
4430 /*
4431 * Flush it from the cache.
4432 */
4433 pgmPoolCacheFlushPage(pPool, pPage);
4434#endif /* PGMPOOL_WITH_CACHE */
4435
4436#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4437 /* Heavy stuff done. */
4438 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4439#endif
4440
4441#ifdef PGMPOOL_WITH_MONITORING
4442 /*
4443 * Deregistering the monitoring.
4444 */
4445 if (pPage->fMonitored)
4446 rc = pgmPoolMonitorFlush(pPool, pPage);
4447#endif
4448
4449 /*
4450 * Free the page.
4451 */
4452 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4453 pPage->iNext = pPool->iFreeHead;
4454 pPool->iFreeHead = pPage->idx;
4455 pPage->enmKind = PGMPOOLKIND_FREE;
4456 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4457 pPage->GCPhys = NIL_RTGCPHYS;
4458 pPage->fReusedFlushPending = false;
4459
4460 pPool->cUsedPages--;
4461 pgmUnlock(pVM);
4462 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4463 return rc;
4464}
4465
4466
4467/**
4468 * Frees a usage of a pool page.
4469 *
4470 * The caller is responsible to updating the user table so that it no longer
4471 * references the shadow page.
4472 *
4473 * @param pPool The pool.
4474 * @param HCPhys The HC physical address of the shadow page.
4475 * @param iUser The shadow page pool index of the user table.
4476 * @param iUserTable The index into the user table (shadowed).
4477 */
4478void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4479{
4480 PVM pVM = pPool->CTX_SUFF(pVM);
4481
4482 STAM_PROFILE_START(&pPool->StatFree, a);
4483 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4484 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4485 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4486 pgmLock(pVM);
4487#ifdef PGMPOOL_WITH_USER_TRACKING
4488 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4489#endif
4490#ifdef PGMPOOL_WITH_CACHE
4491 if (!pPage->fCached)
4492#endif
4493 pgmPoolFlushPage(pPool, pPage);
4494 pgmUnlock(pVM);
4495 STAM_PROFILE_STOP(&pPool->StatFree, a);
4496}
4497
4498
4499/**
4500 * Makes one or more free page free.
4501 *
4502 * @returns VBox status code.
4503 * @retval VINF_SUCCESS on success.
4504 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4505 *
4506 * @param pPool The pool.
4507 * @param enmKind Page table kind
4508 * @param iUser The user of the page.
4509 */
4510static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4511{
4512 PVM pVM = pPool->CTX_SUFF(pVM);
4513
4514 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4515
4516 /*
4517 * If the pool isn't full grown yet, expand it.
4518 */
4519 if ( pPool->cCurPages < pPool->cMaxPages
4520#if defined(IN_RC)
4521 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4522 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4523 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4524#endif
4525 )
4526 {
4527 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4528#ifdef IN_RING3
4529 int rc = PGMR3PoolGrow(pVM);
4530#else
4531 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4532#endif
4533 if (RT_FAILURE(rc))
4534 return rc;
4535 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4536 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4537 return VINF_SUCCESS;
4538 }
4539
4540#ifdef PGMPOOL_WITH_CACHE
4541 /*
4542 * Free one cached page.
4543 */
4544 return pgmPoolCacheFreeOne(pPool, iUser);
4545#else
4546 /*
4547 * Flush the pool.
4548 *
4549 * If we have tracking enabled, it should be possible to come up with
4550 * a cheap replacement strategy...
4551 */
4552 /* @todo This path no longer works (CR3 root pages will be flushed)!! */
4553 AssertCompileFailed();
4554 Assert(!CPUMIsGuestInLongMode(pVM));
4555 pgmPoolFlushAllInt(pPool);
4556 return VERR_PGM_POOL_FLUSHED;
4557#endif
4558}
4559
4560/**
4561 * Allocates a page from the pool.
4562 *
4563 * This page may actually be a cached page and not in need of any processing
4564 * on the callers part.
4565 *
4566 * @returns VBox status code.
4567 * @retval VINF_SUCCESS if a NEW page was allocated.
4568 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4569 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4570 * @param pVM The VM handle.
4571 * @param GCPhys The GC physical address of the page we're gonna shadow.
4572 * For 4MB and 2MB PD entries, it's the first address the
4573 * shadow PT is covering.
4574 * @param enmKind The kind of mapping.
4575 * @param enmAccess Access type for the mapping (only relevant for big pages)
4576 * @param iUser The shadow page pool index of the user table.
4577 * @param iUserTable The index into the user table (shadowed).
4578 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4579 * @param fLockPage Lock the page
4580 */
4581int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4582{
4583 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4584 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4585 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4586 *ppPage = NULL;
4587 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4588 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4589 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4590
4591 pgmLock(pVM);
4592
4593#ifdef PGMPOOL_WITH_CACHE
4594 if (pPool->fCacheEnabled)
4595 {
4596 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4597 if (RT_SUCCESS(rc2))
4598 {
4599 if (fLockPage)
4600 pgmPoolLockPage(pPool, *ppPage);
4601 pgmUnlock(pVM);
4602 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4603 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4604 return rc2;
4605 }
4606 }
4607#endif
4608
4609 /*
4610 * Allocate a new one.
4611 */
4612 int rc = VINF_SUCCESS;
4613 uint16_t iNew = pPool->iFreeHead;
4614 if (iNew == NIL_PGMPOOL_IDX)
4615 {
4616 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4617 if (RT_FAILURE(rc))
4618 {
4619 pgmUnlock(pVM);
4620 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4621 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4622 return rc;
4623 }
4624 iNew = pPool->iFreeHead;
4625 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4626 }
4627
4628 /* unlink the free head */
4629 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4630 pPool->iFreeHead = pPage->iNext;
4631 pPage->iNext = NIL_PGMPOOL_IDX;
4632
4633 /*
4634 * Initialize it.
4635 */
4636 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4637 pPage->enmKind = enmKind;
4638 pPage->enmAccess = enmAccess;
4639 pPage->GCPhys = GCPhys;
4640 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4641 pPage->fMonitored = false;
4642 pPage->fCached = false;
4643#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4644 pPage->fDirty = false;
4645#endif
4646 pPage->fReusedFlushPending = false;
4647#ifdef PGMPOOL_WITH_MONITORING
4648 pPage->cModifications = 0;
4649 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4650 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4651#else
4652 pPage->fCR3Mix = false;
4653#endif
4654#ifdef PGMPOOL_WITH_USER_TRACKING
4655 pPage->cPresent = 0;
4656 pPage->iFirstPresent = ~0;
4657 pPage->pvLastAccessHandlerFault = 0;
4658 pPage->cLastAccessHandlerCount = 0;
4659 pPage->pvLastAccessHandlerRip = 0;
4660
4661 /*
4662 * Insert into the tracking and cache. If this fails, free the page.
4663 */
4664 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4665 if (RT_FAILURE(rc3))
4666 {
4667 pPool->cUsedPages--;
4668 pPage->enmKind = PGMPOOLKIND_FREE;
4669 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4670 pPage->GCPhys = NIL_RTGCPHYS;
4671 pPage->iNext = pPool->iFreeHead;
4672 pPool->iFreeHead = pPage->idx;
4673 pgmUnlock(pVM);
4674 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4675 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4676 return rc3;
4677 }
4678#endif /* PGMPOOL_WITH_USER_TRACKING */
4679
4680 /*
4681 * Commit the allocation, clear the page and return.
4682 */
4683#ifdef VBOX_WITH_STATISTICS
4684 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4685 pPool->cUsedPagesHigh = pPool->cUsedPages;
4686#endif
4687
4688 if (!pPage->fZeroed)
4689 {
4690 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4691 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4692 ASMMemZeroPage(pv);
4693 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4694 }
4695
4696 *ppPage = pPage;
4697 if (fLockPage)
4698 pgmPoolLockPage(pPool, pPage);
4699 pgmUnlock(pVM);
4700 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4701 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4702 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4703 return rc;
4704}
4705
4706
4707/**
4708 * Frees a usage of a pool page.
4709 *
4710 * @param pVM The VM handle.
4711 * @param HCPhys The HC physical address of the shadow page.
4712 * @param iUser The shadow page pool index of the user table.
4713 * @param iUserTable The index into the user table (shadowed).
4714 */
4715void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4716{
4717 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4718 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4719 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4720}
4721
4722/**
4723 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4724 *
4725 * @returns Pointer to the shadow page structure.
4726 * @param pPool The pool.
4727 * @param HCPhys The HC physical address of the shadow page.
4728 */
4729PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4730{
4731 PVM pVM = pPool->CTX_SUFF(pVM);
4732
4733 Assert(PGMIsLockOwner(pVM));
4734
4735 /*
4736 * Look up the page.
4737 */
4738 pgmLock(pVM);
4739 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4740 pgmUnlock(pVM);
4741
4742 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4743 return pPage;
4744}
4745
4746/**
4747 * Flush the specified page if present
4748 *
4749 * @param pVM The VM handle.
4750 * @param GCPhys Guest physical address of the page to flush
4751 */
4752VMMDECL(void) PGMPoolFlushPage(PVM pVM, RTGCPHYS GCPhys)
4753{
4754#ifdef PGMPOOL_WITH_CACHE
4755 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4756
4757 VM_ASSERT_EMT(pVM);
4758
4759 /*
4760 * Look up the GCPhys in the hash.
4761 */
4762 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
4763 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
4764 if (i == NIL_PGMPOOL_IDX)
4765 return;
4766
4767 do
4768 {
4769 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4770 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
4771 {
4772 switch (pPage->enmKind)
4773 {
4774 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4775 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4776 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4777 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4778 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4779 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4780 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4781 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4782 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4783 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4784 case PGMPOOLKIND_64BIT_PML4:
4785 case PGMPOOLKIND_32BIT_PD:
4786 case PGMPOOLKIND_PAE_PDPT:
4787 {
4788 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
4789#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4790 if (pPage->fDirty)
4791 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
4792 else
4793#endif
4794 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
4795 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
4796 pgmPoolMonitorChainFlush(pPool, pPage);
4797 return;
4798 }
4799
4800 /* ignore, no monitoring. */
4801 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4802 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4803 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4804 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4805 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4806 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4807 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4808 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4809 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4810 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4811 case PGMPOOLKIND_ROOT_NESTED:
4812 case PGMPOOLKIND_PAE_PD_PHYS:
4813 case PGMPOOLKIND_PAE_PDPT_PHYS:
4814 case PGMPOOLKIND_32BIT_PD_PHYS:
4815 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4816 break;
4817
4818 default:
4819 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4820 }
4821 }
4822
4823 /* next */
4824 i = pPage->iNext;
4825 } while (i != NIL_PGMPOOL_IDX);
4826#endif
4827 return;
4828}
4829
4830#ifdef IN_RING3
4831/**
4832 * Flushes the entire cache.
4833 *
4834 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4835 * and execute this CR3 flush.
4836 *
4837 * @param pPool The pool.
4838 */
4839void pgmR3PoolReset(PVM pVM)
4840{
4841 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4842
4843 Assert(PGMIsLockOwner(pVM));
4844 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
4845 LogFlow(("pgmPoolFlushAllInt:\n"));
4846
4847 /*
4848 * If there are no pages in the pool, there is nothing to do.
4849 */
4850 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4851 {
4852 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4853 return;
4854 }
4855
4856 /*
4857 * Exit the shadow mode since we're going to clear everything,
4858 * including the root page.
4859 */
4860 for (unsigned i=0;i<pVM->cCPUs;i++)
4861 {
4862 PVMCPU pVCpu = &pVM->aCpus[i];
4863 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
4864 }
4865
4866 /*
4867 * Nuke the free list and reinsert all pages into it.
4868 */
4869 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4870 {
4871 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4872
4873 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
4874#ifdef PGMPOOL_WITH_MONITORING
4875 if (pPage->fMonitored)
4876 pgmPoolMonitorFlush(pPool, pPage);
4877 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4878 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4879 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4880 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4881 pPage->cModifications = 0;
4882#endif
4883 pPage->GCPhys = NIL_RTGCPHYS;
4884 pPage->enmKind = PGMPOOLKIND_FREE;
4885 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4886 Assert(pPage->idx == i);
4887 pPage->iNext = i + 1;
4888 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4889 pPage->fSeenNonGlobal = false;
4890 pPage->fMonitored = false;
4891#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4892 pPage->fDirty = false;
4893#endif
4894 pPage->fCached = false;
4895 pPage->fReusedFlushPending = false;
4896#ifdef PGMPOOL_WITH_USER_TRACKING
4897 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4898#else
4899 pPage->fCR3Mix = false;
4900#endif
4901#ifdef PGMPOOL_WITH_CACHE
4902 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4903 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4904#endif
4905 pPage->cLocked = 0;
4906 }
4907 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4908 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4909 pPool->cUsedPages = 0;
4910
4911#ifdef PGMPOOL_WITH_USER_TRACKING
4912 /*
4913 * Zap and reinitialize the user records.
4914 */
4915 pPool->cPresent = 0;
4916 pPool->iUserFreeHead = 0;
4917 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4918 const unsigned cMaxUsers = pPool->cMaxUsers;
4919 for (unsigned i = 0; i < cMaxUsers; i++)
4920 {
4921 paUsers[i].iNext = i + 1;
4922 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4923 paUsers[i].iUserTable = 0xfffffffe;
4924 }
4925 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4926#endif
4927
4928#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4929 /*
4930 * Clear all the GCPhys links and rebuild the phys ext free list.
4931 */
4932 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
4933 pRam;
4934 pRam = pRam->CTX_SUFF(pNext))
4935 {
4936 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4937 while (iPage-- > 0)
4938 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
4939 }
4940
4941 pPool->iPhysExtFreeHead = 0;
4942 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4943 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4944 for (unsigned i = 0; i < cMaxPhysExts; i++)
4945 {
4946 paPhysExts[i].iNext = i + 1;
4947 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4948 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4949 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4950 }
4951 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4952#endif
4953
4954#ifdef PGMPOOL_WITH_MONITORING
4955 /*
4956 * Just zap the modified list.
4957 */
4958 pPool->cModifiedPages = 0;
4959 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4960#endif
4961
4962#ifdef PGMPOOL_WITH_CACHE
4963 /*
4964 * Clear the GCPhys hash and the age list.
4965 */
4966 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4967 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4968 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4969 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4970#endif
4971
4972#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4973 /* Clear all dirty pages. */
4974 pPool->idxFreeDirtyPage = 0;
4975 pPool->cDirtyPages = 0;
4976 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
4977 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
4978#endif
4979
4980 /*
4981 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4982 */
4983 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4984 {
4985 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4986 pPage->iNext = NIL_PGMPOOL_IDX;
4987#ifdef PGMPOOL_WITH_MONITORING
4988 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4989 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4990 pPage->cModifications = 0;
4991 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4992 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4993 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4994 if (pPage->fMonitored)
4995 {
4996 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4997 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4998 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4999 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5000 pPool->pszAccessHandler);
5001 AssertFatalRCSuccess(rc);
5002# ifdef PGMPOOL_WITH_CACHE
5003 pgmPoolHashInsert(pPool, pPage);
5004# endif
5005 }
5006#endif
5007#ifdef PGMPOOL_WITH_USER_TRACKING
5008 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5009#endif
5010#ifdef PGMPOOL_WITH_CACHE
5011 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5012 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5013#endif
5014 }
5015
5016 for (unsigned i=0;i<pVM->cCPUs;i++)
5017 {
5018 PVMCPU pVCpu = &pVM->aCpus[i];
5019 /*
5020 * Re-enter the shadowing mode and assert Sync CR3 FF.
5021 */
5022 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5023 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5024 }
5025
5026 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
5027}
5028#endif /* IN_RING3 */
5029
5030#ifdef LOG_ENABLED
5031static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5032{
5033 switch(enmKind)
5034 {
5035 case PGMPOOLKIND_INVALID:
5036 return "PGMPOOLKIND_INVALID";
5037 case PGMPOOLKIND_FREE:
5038 return "PGMPOOLKIND_FREE";
5039 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5040 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5041 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5042 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5043 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5044 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5045 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5046 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5047 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5048 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5049 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5050 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5051 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5052 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5053 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5054 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5055 case PGMPOOLKIND_32BIT_PD:
5056 return "PGMPOOLKIND_32BIT_PD";
5057 case PGMPOOLKIND_32BIT_PD_PHYS:
5058 return "PGMPOOLKIND_32BIT_PD_PHYS";
5059 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5060 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5061 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5062 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5063 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5064 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5065 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5066 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5067 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5068 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5069 case PGMPOOLKIND_PAE_PD_PHYS:
5070 return "PGMPOOLKIND_PAE_PD_PHYS";
5071 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5072 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5073 case PGMPOOLKIND_PAE_PDPT:
5074 return "PGMPOOLKIND_PAE_PDPT";
5075 case PGMPOOLKIND_PAE_PDPT_PHYS:
5076 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5077 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5078 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5079 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5080 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5081 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5082 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5083 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5084 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5085 case PGMPOOLKIND_64BIT_PML4:
5086 return "PGMPOOLKIND_64BIT_PML4";
5087 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5088 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5089 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5090 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5091 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5092 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5093 case PGMPOOLKIND_ROOT_NESTED:
5094 return "PGMPOOLKIND_ROOT_NESTED";
5095 }
5096 return "Unknown kind!";
5097}
5098#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette