VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 22771

Last change on this file since 22771 was 22771, checked in by vboxsync, 15 years ago

Fixed raw mode regression of r51925.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 188.4 KB
Line 
1/* $Id: PGMAllPool.cpp 22771 2009-09-04 09:56:20Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48RT_C_DECLS_BEGIN
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50#ifdef PGMPOOL_WITH_USER_TRACKING
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54#endif
55#ifdef PGMPOOL_WITH_CACHE
56static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
57#endif
58#ifdef PGMPOOL_WITH_MONITORING
59static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
60#endif
61#ifndef IN_RING3
62DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
63#endif
64#ifdef LOG_ENABLED
65static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
66#endif
67
68void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs);
69void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt);
70int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
71PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
72void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
73void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
74
75RT_C_DECLS_END
76
77
78/**
79 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
80 *
81 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
82 * @param enmKind The page kind.
83 */
84DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
85{
86 switch (enmKind)
87 {
88 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
89 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
90 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
91 return true;
92 default:
93 return false;
94 }
95}
96
97/** @def PGMPOOL_PAGE_2_LOCKED_PTR
98 * Maps a pool page pool into the current context and lock it (RC only).
99 *
100 * @returns VBox status code.
101 * @param pVM The VM handle.
102 * @param pPage The pool page.
103 *
104 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
105 * small page window employeed by that function. Be careful.
106 * @remark There is no need to assert on the result.
107 */
108#if defined(IN_RC)
109DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
110{
111 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
112
113 /* Make sure the dynamic mapping will not be reused. */
114 if (pv)
115 PGMDynLockHCPage(pVM, (uint8_t *)pv);
116
117 return pv;
118}
119#else
120# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
121#endif
122
123/** @def PGMPOOL_UNLOCK_PTR
124 * Unlock a previously locked dynamic caching (RC only).
125 *
126 * @returns VBox status code.
127 * @param pVM The VM handle.
128 * @param pPage The pool page.
129 *
130 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
131 * small page window employeed by that function. Be careful.
132 * @remark There is no need to assert on the result.
133 */
134#if defined(IN_RC)
135DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
136{
137 if (pvPage)
138 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
139}
140#else
141# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
142#endif
143
144
145#ifdef PGMPOOL_WITH_MONITORING
146/**
147 * Determin the size of a write instruction.
148 * @returns number of bytes written.
149 * @param pDis The disassembler state.
150 */
151static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
152{
153 /*
154 * This is very crude and possibly wrong for some opcodes,
155 * but since it's not really supposed to be called we can
156 * probably live with that.
157 */
158 return DISGetParamSize(pDis, &pDis->param1);
159}
160
161
162/**
163 * Flushes a chain of pages sharing the same access monitor.
164 *
165 * @returns VBox status code suitable for scheduling.
166 * @param pPool The pool.
167 * @param pPage A page in the chain.
168 */
169int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
170{
171 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
172
173 /*
174 * Find the list head.
175 */
176 uint16_t idx = pPage->idx;
177 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
178 {
179 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
180 {
181 idx = pPage->iMonitoredPrev;
182 Assert(idx != pPage->idx);
183 pPage = &pPool->aPages[idx];
184 }
185 }
186
187 /*
188 * Iterate the list flushing each shadow page.
189 */
190 int rc = VINF_SUCCESS;
191 for (;;)
192 {
193 idx = pPage->iMonitoredNext;
194 Assert(idx != pPage->idx);
195 if (pPage->idx >= PGMPOOL_IDX_FIRST)
196 {
197 int rc2 = pgmPoolFlushPage(pPool, pPage);
198 AssertRC(rc2);
199 }
200 /* next */
201 if (idx == NIL_PGMPOOL_IDX)
202 break;
203 pPage = &pPool->aPages[idx];
204 }
205 return rc;
206}
207
208
209/**
210 * Wrapper for getting the current context pointer to the entry being modified.
211 *
212 * @returns VBox status code suitable for scheduling.
213 * @param pVM VM Handle.
214 * @param pvDst Destination address
215 * @param pvSrc Source guest virtual address.
216 * @param GCPhysSrc The source guest physical address.
217 * @param cb Size of data to read
218 */
219DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
220{
221#if defined(IN_RING3)
222 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
223 return VINF_SUCCESS;
224#else
225 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
226 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
227#endif
228}
229
230/**
231 * Process shadow entries before they are changed by the guest.
232 *
233 * For PT entries we will clear them. For PD entries, we'll simply check
234 * for mapping conflicts and set the SyncCR3 FF if found.
235 *
236 * @param pVCpu VMCPU handle
237 * @param pPool The pool.
238 * @param pPage The head page.
239 * @param GCPhysFault The guest physical fault address.
240 * @param uAddress In R0 and GC this is the guest context fault address (flat).
241 * In R3 this is the host context 'fault' address.
242 * @param pDis The disassembler state for figuring out the write size.
243 * This need not be specified if the caller knows we won't do cross entry accesses.
244 */
245void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, PDISCPUSTATE pDis)
246{
247 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
248 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
249 const unsigned cbWrite = pDis ? pgmPoolDisasWriteSize(pDis) : 0;
250 PVM pVM = pPool->CTX_SUFF(pVM);
251
252 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)pvAddress, GCPhysFault, cbWrite));
253
254 for (;;)
255 {
256 union
257 {
258 void *pv;
259 PX86PT pPT;
260 PX86PTPAE pPTPae;
261 PX86PD pPD;
262 PX86PDPAE pPDPae;
263 PX86PDPT pPDPT;
264 PX86PML4 pPML4;
265 } uShw;
266
267 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
268
269 uShw.pv = NULL;
270 switch (pPage->enmKind)
271 {
272 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
273 {
274 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
275 const unsigned iShw = off / sizeof(X86PTE);
276 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
277 if (uShw.pPT->a[iShw].n.u1Present)
278 {
279# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
280 X86PTE GstPte;
281
282 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
283 AssertRC(rc);
284 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
285 pgmPoolTracDerefGCPhysHint(pPool, pPage,
286 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
287 GstPte.u & X86_PTE_PG_MASK);
288# endif
289 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
290 }
291 break;
292 }
293
294 /* page/2 sized */
295 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
296 {
297 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
298 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
299 {
300 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
301 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
302 if (uShw.pPTPae->a[iShw].n.u1Present)
303 {
304# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
305 X86PTE GstPte;
306 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
307 AssertRC(rc);
308
309 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
310 pgmPoolTracDerefGCPhysHint(pPool, pPage,
311 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
312 GstPte.u & X86_PTE_PG_MASK);
313# endif
314 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
315 }
316 }
317 break;
318 }
319
320 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
321 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
322 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
323 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
324 {
325 unsigned iGst = off / sizeof(X86PDE);
326 unsigned iShwPdpt = iGst / 256;
327 unsigned iShw = (iGst % 256) * 2;
328 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
329
330 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
331 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
332 {
333 for (unsigned i = 0; i < 2; i++)
334 {
335# ifndef IN_RING0
336 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
337 {
338 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
339 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
340 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
341 break;
342 }
343 else
344# endif /* !IN_RING0 */
345 if (uShw.pPDPae->a[iShw+i].n.u1Present)
346 {
347 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
348 pgmPoolFree(pVM,
349 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
350 pPage->idx,
351 iShw + i);
352 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
353 }
354
355 /* paranoia / a bit assumptive. */
356 if ( pDis
357 && (off & 3)
358 && (off & 3) + cbWrite > 4)
359 {
360 const unsigned iShw2 = iShw + 2 + i;
361 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
362 {
363# ifndef IN_RING0
364 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
365 {
366 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
367 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
368 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
369 break;
370 }
371 else
372# endif /* !IN_RING0 */
373 if (uShw.pPDPae->a[iShw2].n.u1Present)
374 {
375 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
376 pgmPoolFree(pVM,
377 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
378 pPage->idx,
379 iShw2);
380 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
381 }
382 }
383 }
384 }
385 }
386 break;
387 }
388
389 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
390 {
391 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
392 const unsigned iShw = off / sizeof(X86PTEPAE);
393 if (uShw.pPTPae->a[iShw].n.u1Present)
394 {
395# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
396 X86PTEPAE GstPte;
397 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
398 AssertRC(rc);
399
400 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
401 pgmPoolTracDerefGCPhysHint(pPool, pPage,
402 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
403 GstPte.u & X86_PTE_PAE_PG_MASK);
404# endif
405 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
406 }
407
408 /* paranoia / a bit assumptive. */
409 if ( pDis
410 && (off & 7)
411 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
412 {
413 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
414 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
415
416 if (uShw.pPTPae->a[iShw2].n.u1Present)
417 {
418# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
419 X86PTEPAE GstPte;
420# ifdef IN_RING3
421 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
422# else
423 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
424# endif
425 AssertRC(rc);
426 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
427 pgmPoolTracDerefGCPhysHint(pPool, pPage,
428 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
429 GstPte.u & X86_PTE_PAE_PG_MASK);
430# endif
431 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
432 }
433 }
434 break;
435 }
436
437 case PGMPOOLKIND_32BIT_PD:
438 {
439 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
440 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
441
442 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
443# ifndef IN_RING0
444 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
445 {
446 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
447 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
448 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
449 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
450 break;
451 }
452# endif /* !IN_RING0 */
453# ifndef IN_RING0
454 else
455# endif /* !IN_RING0 */
456 {
457 if (uShw.pPD->a[iShw].n.u1Present)
458 {
459 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
460 pgmPoolFree(pVM,
461 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
462 pPage->idx,
463 iShw);
464 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
465 }
466 }
467 /* paranoia / a bit assumptive. */
468 if ( pDis
469 && (off & 3)
470 && (off & 3) + cbWrite > sizeof(X86PTE))
471 {
472 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
473 if ( iShw2 != iShw
474 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
475 {
476# ifndef IN_RING0
477 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
478 {
479 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
480 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
481 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
482 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
483 break;
484 }
485# endif /* !IN_RING0 */
486# ifndef IN_RING0
487 else
488# endif /* !IN_RING0 */
489 {
490 if (uShw.pPD->a[iShw2].n.u1Present)
491 {
492 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
493 pgmPoolFree(pVM,
494 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
495 pPage->idx,
496 iShw2);
497 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
498 }
499 }
500 }
501 }
502#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
503 if ( uShw.pPD->a[iShw].n.u1Present
504 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
505 {
506 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
507# ifdef IN_RC /* TLB load - we're pushing things a bit... */
508 ASMProbeReadByte(pvAddress);
509# endif
510 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
511 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
512 }
513#endif
514 break;
515 }
516
517 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
518 {
519 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
520 const unsigned iShw = off / sizeof(X86PDEPAE);
521#ifndef IN_RING0
522 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
523 {
524 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
525 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
526 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
527 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
528 break;
529 }
530#endif /* !IN_RING0 */
531 /*
532 * Causes trouble when the guest uses a PDE to refer to the whole page table level
533 * structure. (Invalidate here; faults later on when it tries to change the page
534 * table entries -> recheck; probably only applies to the RC case.)
535 */
536# ifndef IN_RING0
537 else
538# endif /* !IN_RING0 */
539 {
540 if (uShw.pPDPae->a[iShw].n.u1Present)
541 {
542 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
543 pgmPoolFree(pVM,
544 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
545 pPage->idx,
546 iShw);
547 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
548 }
549 }
550 /* paranoia / a bit assumptive. */
551 if ( pDis
552 && (off & 7)
553 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
554 {
555 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
556 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
557
558#ifndef IN_RING0
559 if ( iShw2 != iShw
560 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
561 {
562 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
563 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
564 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
565 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
566 break;
567 }
568#endif /* !IN_RING0 */
569# ifndef IN_RING0
570 else
571# endif /* !IN_RING0 */
572 if (uShw.pPDPae->a[iShw2].n.u1Present)
573 {
574 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
575 pgmPoolFree(pVM,
576 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
577 pPage->idx,
578 iShw2);
579 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
580 }
581 }
582 break;
583 }
584
585 case PGMPOOLKIND_PAE_PDPT:
586 {
587 /*
588 * Hopefully this doesn't happen very often:
589 * - touching unused parts of the page
590 * - messing with the bits of pd pointers without changing the physical address
591 */
592 /* PDPT roots are not page aligned; 32 byte only! */
593 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
594
595 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
596 const unsigned iShw = offPdpt / sizeof(X86PDPE);
597 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
598 {
599# ifndef IN_RING0
600 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
601 {
602 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
603 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
604 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
605 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
606 break;
607 }
608# endif /* !IN_RING0 */
609# ifndef IN_RING0
610 else
611# endif /* !IN_RING0 */
612 if (uShw.pPDPT->a[iShw].n.u1Present)
613 {
614 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
615 pgmPoolFree(pVM,
616 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
617 pPage->idx,
618 iShw);
619 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
620 }
621
622 /* paranoia / a bit assumptive. */
623 if ( pDis
624 && (offPdpt & 7)
625 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
626 {
627 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
628 if ( iShw2 != iShw
629 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
630 {
631# ifndef IN_RING0
632 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
633 {
634 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
635 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
636 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
637 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
638 break;
639 }
640# endif /* !IN_RING0 */
641# ifndef IN_RING0
642 else
643# endif /* !IN_RING0 */
644 if (uShw.pPDPT->a[iShw2].n.u1Present)
645 {
646 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
647 pgmPoolFree(pVM,
648 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
649 pPage->idx,
650 iShw2);
651 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
652 }
653 }
654 }
655 }
656 break;
657 }
658
659#ifndef IN_RC
660 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
661 {
662 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
663 const unsigned iShw = off / sizeof(X86PDEPAE);
664 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
665 if (uShw.pPDPae->a[iShw].n.u1Present)
666 {
667 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
668 pgmPoolFree(pVM,
669 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
670 pPage->idx,
671 iShw);
672 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
673 }
674 /* paranoia / a bit assumptive. */
675 if ( pDis
676 && (off & 7)
677 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
678 {
679 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
680 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
681
682 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
683 if (uShw.pPDPae->a[iShw2].n.u1Present)
684 {
685 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
686 pgmPoolFree(pVM,
687 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
688 pPage->idx,
689 iShw2);
690 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
691 }
692 }
693 break;
694 }
695
696 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
697 {
698 /*
699 * Hopefully this doesn't happen very often:
700 * - messing with the bits of pd pointers without changing the physical address
701 */
702 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
703 {
704 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
705 const unsigned iShw = off / sizeof(X86PDPE);
706 if (uShw.pPDPT->a[iShw].n.u1Present)
707 {
708 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
709 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
710 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
711 }
712 /* paranoia / a bit assumptive. */
713 if ( pDis
714 && (off & 7)
715 && (off & 7) + cbWrite > sizeof(X86PDPE))
716 {
717 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
718 if (uShw.pPDPT->a[iShw2].n.u1Present)
719 {
720 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
721 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
722 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
723 }
724 }
725 }
726 break;
727 }
728
729 case PGMPOOLKIND_64BIT_PML4:
730 {
731 /*
732 * Hopefully this doesn't happen very often:
733 * - messing with the bits of pd pointers without changing the physical address
734 */
735 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
736 {
737 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
738 const unsigned iShw = off / sizeof(X86PDPE);
739 if (uShw.pPML4->a[iShw].n.u1Present)
740 {
741 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
742 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
743 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
744 }
745 /* paranoia / a bit assumptive. */
746 if ( pDis
747 && (off & 7)
748 && (off & 7) + cbWrite > sizeof(X86PDPE))
749 {
750 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
751 if (uShw.pPML4->a[iShw2].n.u1Present)
752 {
753 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
754 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
755 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
756 }
757 }
758 }
759 break;
760 }
761#endif /* IN_RING0 */
762
763 default:
764 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
765 }
766 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
767
768 /* next */
769 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
770 return;
771 pPage = &pPool->aPages[pPage->iMonitoredNext];
772 }
773}
774
775# ifndef IN_RING3
776/**
777 * Checks if a access could be a fork operation in progress.
778 *
779 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
780 *
781 * @returns true if it's likly that we're forking, otherwise false.
782 * @param pPool The pool.
783 * @param pDis The disassembled instruction.
784 * @param offFault The access offset.
785 */
786DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
787{
788 /*
789 * i386 linux is using btr to clear X86_PTE_RW.
790 * The functions involved are (2.6.16 source inspection):
791 * clear_bit
792 * ptep_set_wrprotect
793 * copy_one_pte
794 * copy_pte_range
795 * copy_pmd_range
796 * copy_pud_range
797 * copy_page_range
798 * dup_mmap
799 * dup_mm
800 * copy_mm
801 * copy_process
802 * do_fork
803 */
804 if ( pDis->pCurInstr->opcode == OP_BTR
805 && !(offFault & 4)
806 /** @todo Validate that the bit index is X86_PTE_RW. */
807 )
808 {
809 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
810 return true;
811 }
812 return false;
813}
814
815
816/**
817 * Determine whether the page is likely to have been reused.
818 *
819 * @returns true if we consider the page as being reused for a different purpose.
820 * @returns false if we consider it to still be a paging page.
821 * @param pVM VM Handle.
822 * @param pVCpu VMCPU Handle.
823 * @param pRegFrame Trap register frame.
824 * @param pDis The disassembly info for the faulting instruction.
825 * @param pvFault The fault address.
826 *
827 * @remark The REP prefix check is left to the caller because of STOSD/W.
828 */
829DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
830{
831#ifndef IN_RC
832 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
833 if ( HWACCMHasPendingIrq(pVM)
834 && (pRegFrame->rsp - pvFault) < 32)
835 {
836 /* Fault caused by stack writes while trying to inject an interrupt event. */
837 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
838 return true;
839 }
840#else
841 NOREF(pVM); NOREF(pvFault);
842#endif
843
844 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
845
846 /* Non-supervisor mode write means it's used for something else. */
847 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
848 return true;
849
850 switch (pDis->pCurInstr->opcode)
851 {
852 /* call implies the actual push of the return address faulted */
853 case OP_CALL:
854 Log4(("pgmPoolMonitorIsReused: CALL\n"));
855 return true;
856 case OP_PUSH:
857 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
858 return true;
859 case OP_PUSHF:
860 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
861 return true;
862 case OP_PUSHA:
863 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
864 return true;
865 case OP_FXSAVE:
866 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
867 return true;
868 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
869 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
870 return true;
871 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
872 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
873 return true;
874 case OP_MOVSWD:
875 case OP_STOSWD:
876 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
877 && pRegFrame->rcx >= 0x40
878 )
879 {
880 Assert(pDis->mode == CPUMODE_64BIT);
881
882 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
883 return true;
884 }
885 return false;
886 }
887 if ( ( (pDis->param1.flags & USE_REG_GEN32)
888 || (pDis->param1.flags & USE_REG_GEN64))
889 && (pDis->param1.base.reg_gen == USE_REG_ESP))
890 {
891 Log4(("pgmPoolMonitorIsReused: ESP\n"));
892 return true;
893 }
894
895 return false;
896}
897
898/**
899 * Flushes the page being accessed.
900 *
901 * @returns VBox status code suitable for scheduling.
902 * @param pVM The VM handle.
903 * @param pVCpu The VMCPU handle.
904 * @param pPool The pool.
905 * @param pPage The pool page (head).
906 * @param pDis The disassembly of the write instruction.
907 * @param pRegFrame The trap register frame.
908 * @param GCPhysFault The fault address as guest physical address.
909 * @param pvFault The fault address.
910 */
911static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
912 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
913{
914#ifdef IN_RING0
915 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
916#else
917 /*
918 * First, do the flushing.
919 */
920 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
921
922 /*
923 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection). Must do this in raw mode (!); XP boot will fail otherwise
924 */
925 uint32_t cbWritten;
926 int rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten);
927 if (RT_SUCCESS(rc2))
928 pRegFrame->rip += pDis->opsize;
929 else if (rc2 == VERR_EM_INTERPRETER)
930 {
931#ifdef IN_RC
932 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
933 {
934 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
935 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
936 rc = VINF_SUCCESS;
937 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
938 }
939 else
940#endif
941 {
942 rc = VINF_EM_RAW_EMULATE_INSTR;
943 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
944 }
945 }
946 else
947 rc = rc2;
948
949 /* See use in pgmPoolAccessHandlerSimple(). */
950 PGM_INVL_VCPU_TLBS(pVCpu);
951#endif
952 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
953 return rc;
954}
955
956/**
957 * Handles the STOSD write accesses.
958 *
959 * @returns VBox status code suitable for scheduling.
960 * @param pVM The VM handle.
961 * @param pPool The pool.
962 * @param pPage The pool page (head).
963 * @param pDis The disassembly of the write instruction.
964 * @param pRegFrame The trap register frame.
965 * @param GCPhysFault The fault address as guest physical address.
966 * @param pvFault The fault address.
967 */
968DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
969 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
970{
971 unsigned uIncrement = pDis->param1.size;
972
973 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
974 Assert(pRegFrame->rcx <= 0x20);
975
976#ifdef VBOX_STRICT
977 if (pDis->opmode == CPUMODE_32BIT)
978 Assert(uIncrement == 4);
979 else
980 Assert(uIncrement == 8);
981#endif
982
983 Log3(("pgmPoolAccessHandlerSTOSD\n"));
984
985 /*
986 * Increment the modification counter and insert it into the list
987 * of modified pages the first time.
988 */
989 if (!pPage->cModifications++)
990 pgmPoolMonitorModifiedInsert(pPool, pPage);
991
992 /*
993 * Execute REP STOSD.
994 *
995 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
996 * write situation, meaning that it's safe to write here.
997 */
998 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
999 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
1000 while (pRegFrame->rcx)
1001 {
1002#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1003 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1004 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1005 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1006#else
1007 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1008#endif
1009#ifdef IN_RC
1010 *(uint32_t *)pu32 = pRegFrame->eax;
1011#else
1012 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
1013#endif
1014 pu32 += uIncrement;
1015 GCPhysFault += uIncrement;
1016 pRegFrame->rdi += uIncrement;
1017 pRegFrame->rcx--;
1018 }
1019 pRegFrame->rip += pDis->opsize;
1020
1021#ifdef IN_RC
1022 /* See use in pgmPoolAccessHandlerSimple(). */
1023 PGM_INVL_VCPU_TLBS(pVCpu);
1024#endif
1025
1026 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1027 return VINF_SUCCESS;
1028}
1029
1030
1031/**
1032 * Handles the simple write accesses.
1033 *
1034 * @returns VBox status code suitable for scheduling.
1035 * @param pVM The VM handle.
1036 * @param pVCpu The VMCPU handle.
1037 * @param pPool The pool.
1038 * @param pPage The pool page (head).
1039 * @param pDis The disassembly of the write instruction.
1040 * @param pRegFrame The trap register frame.
1041 * @param GCPhysFault The fault address as guest physical address.
1042 * @param pvFault The fault address.
1043 * @param pfReused Reused state (out)
1044 */
1045DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1046 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
1047{
1048 Log3(("pgmPoolAccessHandlerSimple\n"));
1049 /*
1050 * Increment the modification counter and insert it into the list
1051 * of modified pages the first time.
1052 */
1053 if (!pPage->cModifications++)
1054 pgmPoolMonitorModifiedInsert(pPool, pPage);
1055
1056 /*
1057 * Clear all the pages. ASSUMES that pvFault is readable.
1058 */
1059#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1060 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1061 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, pDis);
1062 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1063#else
1064 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, pDis);
1065#endif
1066
1067 /*
1068 * Interpret the instruction.
1069 */
1070 uint32_t cb;
1071 int rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb);
1072 if (RT_SUCCESS(rc))
1073 pRegFrame->rip += pDis->opsize;
1074 else if (rc == VERR_EM_INTERPRETER)
1075 {
1076 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1077 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1078 rc = VINF_EM_RAW_EMULATE_INSTR;
1079 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1080 }
1081
1082 if (rc == VINF_SUCCESS)
1083 {
1084 switch (pPage->enmKind)
1085 {
1086 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1087 {
1088 X86PTEPAE GstPte;
1089 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1090 AssertRC(rc);
1091
1092 /* Check the new value written by the guest. If present and with a bogus physical address, then
1093 * it's fairly safe to assume the guest is reusing the PT.
1094 */
1095 if (GstPte.n.u1Present)
1096 {
1097 RTHCPHYS HCPhys = -1;
1098 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1099 if (rc != VINF_SUCCESS)
1100 {
1101 *pfReused = true;
1102 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1103 }
1104 }
1105 break;
1106 }
1107 }
1108 }
1109
1110#ifdef IN_RC
1111 /*
1112 * Quick hack, with logging enabled we're getting stale
1113 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1114 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1115 * have to be fixed to support this. But that'll have to wait till next week.
1116 *
1117 * An alternative is to keep track of the changed PTEs together with the
1118 * GCPhys from the guest PT. This may proove expensive though.
1119 *
1120 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1121 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1122 */
1123 PGM_INVL_VCPU_TLBS(pVCpu);
1124#endif
1125
1126 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1127 return rc;
1128}
1129
1130/**
1131 * \#PF Handler callback for PT write accesses.
1132 *
1133 * @returns VBox status code (appropriate for GC return).
1134 * @param pVM VM Handle.
1135 * @param uErrorCode CPU Error code.
1136 * @param pRegFrame Trap register frame.
1137 * NULL on DMA and other non CPU access.
1138 * @param pvFault The fault address (cr2).
1139 * @param GCPhysFault The GC physical address corresponding to pvFault.
1140 * @param pvUser User argument.
1141 */
1142DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1143{
1144 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1145 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1146 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1147 PVMCPU pVCpu = VMMGetCpu(pVM);
1148 unsigned cMaxModifications;
1149 bool fForcedFlush = false;
1150
1151 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1152
1153 pgmLock(pVM);
1154 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1155 {
1156 /* Pool page changed while we were waiting for the lock; ignore. */
1157 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1158 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1159 pgmUnlock(pVM);
1160 return VINF_SUCCESS;
1161 }
1162
1163 /*
1164 * Disassemble the faulting instruction.
1165 */
1166 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1167 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1168 AssertReturnStmt(rc == VINF_SUCCESS, pgmUnlock(pVM), rc);
1169
1170 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1171
1172 /*
1173 * We should ALWAYS have the list head as user parameter. This
1174 * is because we use that page to record the changes.
1175 */
1176 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1177#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1178 Assert(!pPage->fDirty);
1179#endif
1180
1181 /* Maximum nr of modifications depends on the guest mode. */
1182 if (pDis->mode == CPUMODE_32BIT)
1183 cMaxModifications = 48;
1184 else
1185 cMaxModifications = 24;
1186
1187 /*
1188 * Incremental page table updates should weight more than random ones.
1189 * (Only applies when started from offset 0)
1190 */
1191 pVCpu->pgm.s.cPoolAccessHandler++;
1192 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1193 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1194 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1195 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1196 {
1197 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1198 pPage->cModifications = pPage->cModifications * 2;
1199 pPage->pvLastAccessHandlerFault = pvFault;
1200 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1201 if (pPage->cModifications >= cMaxModifications)
1202 {
1203 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1204 fForcedFlush = true;
1205 }
1206 }
1207
1208 if (pPage->cModifications >= cMaxModifications)
1209 Log(("Mod overflow %VGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1210
1211 /*
1212 * Check if it's worth dealing with.
1213 */
1214 bool fReused = false;
1215 bool fNotReusedNotForking = false;
1216 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1217 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1218 )
1219 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1220 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1221 {
1222 /*
1223 * Simple instructions, no REP prefix.
1224 */
1225 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1226 {
1227 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1228 if (fReused)
1229 goto flushPage;
1230
1231 /* A mov instruction to change the first page table entry will be remembered so we can detect
1232 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1233 */
1234 if ( rc == VINF_SUCCESS
1235 && pDis->pCurInstr->opcode == OP_MOV
1236 && (pvFault & PAGE_OFFSET_MASK) == 0)
1237 {
1238 pPage->pvLastAccessHandlerFault = pvFault;
1239 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1240 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1241 /* Make sure we don't kick out a page too quickly. */
1242 if (pPage->cModifications > 8)
1243 pPage->cModifications = 2;
1244 }
1245 else
1246 if (pPage->pvLastAccessHandlerFault == pvFault)
1247 {
1248 /* ignore the 2nd write to this page table entry. */
1249 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1250 }
1251 else
1252 {
1253 pPage->pvLastAccessHandlerFault = 0;
1254 pPage->pvLastAccessHandlerRip = 0;
1255 }
1256
1257 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1258 pgmUnlock(pVM);
1259 return rc;
1260 }
1261
1262 /*
1263 * Windows is frequently doing small memset() operations (netio test 4k+).
1264 * We have to deal with these or we'll kill the cache and performance.
1265 */
1266 if ( pDis->pCurInstr->opcode == OP_STOSWD
1267 && !pRegFrame->eflags.Bits.u1DF
1268 && pDis->opmode == pDis->mode
1269 && pDis->addrmode == pDis->mode)
1270 {
1271 bool fValidStosd = false;
1272
1273 if ( pDis->mode == CPUMODE_32BIT
1274 && pDis->prefix == PREFIX_REP
1275 && pRegFrame->ecx <= 0x20
1276 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1277 && !((uintptr_t)pvFault & 3)
1278 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1279 )
1280 {
1281 fValidStosd = true;
1282 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1283 }
1284 else
1285 if ( pDis->mode == CPUMODE_64BIT
1286 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1287 && pRegFrame->rcx <= 0x20
1288 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1289 && !((uintptr_t)pvFault & 7)
1290 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1291 )
1292 {
1293 fValidStosd = true;
1294 }
1295
1296 if (fValidStosd)
1297 {
1298 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1299 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1300 pgmUnlock(pVM);
1301 return rc;
1302 }
1303 }
1304
1305 /* REP prefix, don't bother. */
1306 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1307 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1308 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1309 fNotReusedNotForking = true;
1310 }
1311
1312#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1313 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1314 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1315 */
1316 if ( pPage->cModifications >= cMaxModifications
1317 && !fForcedFlush
1318 && pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1319 && ( fNotReusedNotForking
1320 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1321 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1322 )
1323 )
1324 {
1325 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1326 Assert(pPage->fDirty == false);
1327
1328 /* Flush any monitored duplicates as we will disable write protection. */
1329 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1330 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1331 {
1332 PPGMPOOLPAGE pPageHead = pPage;
1333
1334 /* Find the monitor head. */
1335 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1336 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1337
1338 while (pPageHead)
1339 {
1340 unsigned idxNext = pPageHead->iMonitoredNext;
1341
1342 if (pPageHead != pPage)
1343 {
1344 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1345 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1346 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1347 AssertRC(rc2);
1348 }
1349
1350 if (idxNext == NIL_PGMPOOL_IDX)
1351 break;
1352
1353 pPageHead = &pPool->aPages[idxNext];
1354 }
1355 }
1356
1357 /* The flushing above might fail for locked pages, so double check. */
1358 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1359 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1360 {
1361 /* Temporarily allow write access to the page table again. */
1362 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
1363 if (rc == VINF_SUCCESS)
1364 {
1365 rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
1366 AssertMsg(rc == VINF_SUCCESS
1367 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1368 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1369 || rc == VERR_PAGE_NOT_PRESENT,
1370 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1371
1372 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1373 pPage->pvDirtyFault = pvFault;
1374
1375 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1376 pgmUnlock(pVM);
1377 return rc;
1378 }
1379 }
1380 }
1381#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1382
1383flushPage:
1384 /*
1385 * Not worth it, so flush it.
1386 *
1387 * If we considered it to be reused, don't go back to ring-3
1388 * to emulate failed instructions since we usually cannot
1389 * interpret then. This may be a bit risky, in which case
1390 * the reuse detection must be fixed.
1391 */
1392 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1393 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1394 rc = VINF_SUCCESS;
1395 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1396 pgmUnlock(pVM);
1397 return rc;
1398}
1399
1400# endif /* !IN_RING3 */
1401
1402# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1403
1404# ifdef VBOX_STRICT
1405/**
1406 * Check references to guest physical memory in a PAE / PAE page table.
1407 *
1408 * @param pPool The pool.
1409 * @param pPage The page.
1410 * @param pShwPT The shadow page table (mapping of the page).
1411 * @param pGstPT The guest page table.
1412 */
1413DECLINLINE(void) pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
1414{
1415 unsigned cErrors = 0;
1416#ifdef VBOX_STRICT
1417 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1418 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1419#endif
1420 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1421 {
1422 if (pShwPT->a[i].n.u1Present)
1423 {
1424 RTHCPHYS HCPhys = -1;
1425 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1426 if ( rc != VINF_SUCCESS
1427 || (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) != HCPhys)
1428 {
1429 RTHCPHYS HCPhysPT = -1;
1430 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1431 cErrors++;
1432
1433 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pPage->GCPhys, &HCPhysPT);
1434 AssertRC(rc);
1435
1436 for (unsigned i = 0; i < pPool->cCurPages; i++)
1437 {
1438 PPGMPOOLPAGE pTempPage = &pPool->aPages[i];
1439
1440 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1441 {
1442 PX86PTPAE pShwPT2 = (PX86PTPAE)PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pTempPage);
1443
1444 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1445 {
1446 if ( pShwPT2->a[j].n.u1Present
1447 && pShwPT2->a[j].n.u1Write
1448 && ((pShwPT2->a[j].u & X86_PTE_PAE_PG_MASK) == HCPhysPT))
1449 {
1450 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, pShwPT->a[j].u, pShwPT2->a[j].u));
1451 }
1452 }
1453 }
1454 }
1455 }
1456 }
1457 }
1458 Assert(!cErrors);
1459}
1460# endif /* VBOX_STRICT */
1461
1462/**
1463 * Clear references to guest physical memory in a PAE / PAE page table.
1464 *
1465 * @returns nr of changed PTEs
1466 * @param pPool The pool.
1467 * @param pPage The page.
1468 * @param pShwPT The shadow page table (mapping of the page).
1469 * @param pGstPT The guest page table.
1470 * @param pOldGstPT The old cached guest page table.
1471 */
1472DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT, PCX86PTPAE pOldGstPT)
1473{
1474 unsigned cChanged = 0;
1475
1476#ifdef VBOX_STRICT
1477 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1478 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1479#endif
1480 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1481 {
1482 if (pShwPT->a[i].n.u1Present)
1483 {
1484 /* The the old cached PTE is identical, then there's no need to flush the shadow copy. */
1485 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1486 {
1487#ifdef VBOX_STRICT
1488 RTHCPHYS HCPhys = -1;
1489 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1490 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1491#endif
1492 uint64_t uHostAttr = pShwPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1493 bool fHostRW = !!(pShwPT->a[i].u & X86_PTE_RW);
1494 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1495 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1496
1497 if ( uHostAttr == uGuestAttr
1498 && fHostRW <= fGuestRW)
1499 continue;
1500 }
1501 cChanged++;
1502 /* Something was changed, so flush it. */
1503 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1504 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1505 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
1506 ASMAtomicWriteSize(&pShwPT->a[i].u, 0);
1507 }
1508 }
1509 return cChanged;
1510}
1511
1512
1513/**
1514 * Flush a dirty page
1515 *
1516 * @param pVM VM Handle.
1517 * @param pPool The pool.
1518 * @param idxSlot Dirty array slot index
1519 * @param fForceRemoval Force removal from the dirty page list
1520 */
1521static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fForceRemoval = false)
1522{
1523 PPGMPOOLPAGE pPage;
1524 unsigned idxPage;
1525
1526 Assert(idxSlot < RT_ELEMENTS(pPool->aIdxDirtyPages));
1527 if (pPool->aIdxDirtyPages[idxSlot] == NIL_PGMPOOL_IDX)
1528 return;
1529
1530 idxPage = pPool->aIdxDirtyPages[idxSlot];
1531 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1532 pPage = &pPool->aPages[idxPage];
1533 Assert(pPage->idx == idxPage);
1534 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1535
1536 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1537 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1538
1539 /* Flush those PTEs that have changed. */
1540 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1541 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1542 void *pvGst;
1543 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1544 unsigned cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst, (PCX86PTPAE)&pPool->aDirtyPages[idxSlot][0]);
1545 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1546
1547 /** Note: we might want to consider keeping the dirty page active in case there were many changes. */
1548
1549 /* Write protect the page again to catch all write accesses. */
1550 rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys);
1551 Assert(rc == VINF_SUCCESS);
1552 pPage->fDirty = false;
1553
1554#ifdef VBOX_STRICT
1555 uint64_t fFlags = 0;
1556 RTHCPHYS HCPhys;
1557 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1558 AssertMsg( ( rc == VINF_SUCCESS
1559 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1560 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1561 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1562 || rc == VERR_PAGE_NOT_PRESENT,
1563 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1564#endif
1565
1566 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1567 Assert(pPage->cModifications);
1568 if (cChanges < 4)
1569 pPage->cModifications = 1; /* must use > 0 here */
1570 else
1571 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1572
1573 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1574 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages))
1575 pPool->idxFreeDirtyPage = idxSlot;
1576
1577 pPool->cDirtyPages--;
1578 pPool->aIdxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1579 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1580 Log(("Removed dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1581}
1582
1583# ifndef IN_RING3
1584/**
1585 * Add a new dirty page
1586 *
1587 * @param pVM VM Handle.
1588 * @param pPool The pool.
1589 * @param pPage The page.
1590 */
1591void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1592{
1593 unsigned idxFree;
1594
1595 Assert(PGMIsLocked(pVM));
1596 AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
1597 Assert(!pPage->fDirty);
1598
1599 idxFree = pPool->idxFreeDirtyPage;
1600 Assert(idxFree < RT_ELEMENTS(pPool->aIdxDirtyPages));
1601 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1602
1603 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aIdxDirtyPages))
1604 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* force removal */);
1605 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages));
1606 AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1607
1608 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1609
1610 /* Make a copy of the guest page table as we require valid GCPhys addresses when removing
1611 * references to physical pages. (the HCPhys linear lookup is *extremely* expensive!)
1612 */
1613 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1614 void *pvGst;
1615 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1616 memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
1617#ifdef VBOX_STRICT
1618 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1619#endif
1620
1621 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1622 pPage->fDirty = true;
1623 pPage->idxDirty = idxFree;
1624 pPool->aIdxDirtyPages[idxFree] = pPage->idx;
1625 pPool->cDirtyPages++;
1626
1627 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1628 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages)
1629 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1630 {
1631 unsigned i;
1632 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1633 {
1634 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1635 if (pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1636 {
1637 pPool->idxFreeDirtyPage = idxFree;
1638 break;
1639 }
1640 }
1641 Assert(i != RT_ELEMENTS(pPool->aIdxDirtyPages));
1642 }
1643
1644 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages) || pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1645 return;
1646}
1647# endif /* !IN_RING3 */
1648
1649/**
1650 * Check if the specified page is dirty (not write monitored)
1651 *
1652 * @return dirty or not
1653 * @param pVM VM Handle.
1654 * @param GCPhys Guest physical address
1655 */
1656bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1657{
1658 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1659 Assert(PGMIsLocked(pVM));
1660 if (!pPool->cDirtyPages)
1661 return false;
1662
1663 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1664
1665 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1666 {
1667 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1668 {
1669 PPGMPOOLPAGE pPage;
1670 unsigned idxPage = pPool->aIdxDirtyPages[i];
1671
1672 pPage = &pPool->aPages[idxPage];
1673 if (pPage->GCPhys == GCPhys)
1674 return true;
1675 }
1676 }
1677 return false;
1678}
1679
1680/**
1681 * Reset all dirty pages by reinstating page monitoring.
1682 *
1683 * @param pVM VM Handle.
1684 * @param fForceRemoval Force removal of all dirty pages
1685 */
1686void pgmPoolResetDirtyPages(PVM pVM, bool fForceRemoval)
1687{
1688 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1689 Assert(PGMIsLocked(pVM));
1690 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1691
1692 if (!pPool->cDirtyPages)
1693 return;
1694
1695 Log(("pgmPoolResetDirtyPages\n"));
1696 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1697 pgmPoolFlushDirtyPage(pVM, pPool, i, fForceRemoval);
1698
1699 pPool->idxFreeDirtyPage = 0;
1700 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1701 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1702 {
1703 unsigned i;
1704 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1705 {
1706 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1707 {
1708 pPool->idxFreeDirtyPage = i;
1709 break;
1710 }
1711 }
1712 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1713 }
1714
1715 Assert(pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages));
1716 return;
1717}
1718# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1719#endif /* PGMPOOL_WITH_MONITORING */
1720
1721#ifdef PGMPOOL_WITH_CACHE
1722
1723/**
1724 * Inserts a page into the GCPhys hash table.
1725 *
1726 * @param pPool The pool.
1727 * @param pPage The page.
1728 */
1729DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1730{
1731 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1732 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1733 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1734 pPage->iNext = pPool->aiHash[iHash];
1735 pPool->aiHash[iHash] = pPage->idx;
1736}
1737
1738
1739/**
1740 * Removes a page from the GCPhys hash table.
1741 *
1742 * @param pPool The pool.
1743 * @param pPage The page.
1744 */
1745DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1746{
1747 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1748 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1749 if (pPool->aiHash[iHash] == pPage->idx)
1750 pPool->aiHash[iHash] = pPage->iNext;
1751 else
1752 {
1753 uint16_t iPrev = pPool->aiHash[iHash];
1754 for (;;)
1755 {
1756 const int16_t i = pPool->aPages[iPrev].iNext;
1757 if (i == pPage->idx)
1758 {
1759 pPool->aPages[iPrev].iNext = pPage->iNext;
1760 break;
1761 }
1762 if (i == NIL_PGMPOOL_IDX)
1763 {
1764 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1765 break;
1766 }
1767 iPrev = i;
1768 }
1769 }
1770 pPage->iNext = NIL_PGMPOOL_IDX;
1771}
1772
1773
1774/**
1775 * Frees up one cache page.
1776 *
1777 * @returns VBox status code.
1778 * @retval VINF_SUCCESS on success.
1779 * @param pPool The pool.
1780 * @param iUser The user index.
1781 */
1782static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1783{
1784#ifndef IN_RC
1785 const PVM pVM = pPool->CTX_SUFF(pVM);
1786#endif
1787 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1788 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1789
1790 /*
1791 * Select one page from the tail of the age list.
1792 */
1793 PPGMPOOLPAGE pPage;
1794 for (unsigned iLoop = 0; ; iLoop++)
1795 {
1796 uint16_t iToFree = pPool->iAgeTail;
1797 if (iToFree == iUser)
1798 iToFree = pPool->aPages[iToFree].iAgePrev;
1799/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1800 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1801 {
1802 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1803 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1804 {
1805 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1806 continue;
1807 iToFree = i;
1808 break;
1809 }
1810 }
1811*/
1812 Assert(iToFree != iUser);
1813 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1814 pPage = &pPool->aPages[iToFree];
1815
1816 /*
1817 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1818 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1819 */
1820 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1821 break;
1822 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1823 pgmPoolCacheUsed(pPool, pPage);
1824 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1825 }
1826
1827 /*
1828 * Found a usable page, flush it and return.
1829 */
1830 return pgmPoolFlushPage(pPool, pPage);
1831}
1832
1833
1834/**
1835 * Checks if a kind mismatch is really a page being reused
1836 * or if it's just normal remappings.
1837 *
1838 * @returns true if reused and the cached page (enmKind1) should be flushed
1839 * @returns false if not reused.
1840 * @param enmKind1 The kind of the cached page.
1841 * @param enmKind2 The kind of the requested page.
1842 */
1843static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1844{
1845 switch (enmKind1)
1846 {
1847 /*
1848 * Never reuse them. There is no remapping in non-paging mode.
1849 */
1850 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1851 case PGMPOOLKIND_32BIT_PD_PHYS:
1852 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1853 case PGMPOOLKIND_PAE_PD_PHYS:
1854 case PGMPOOLKIND_PAE_PDPT_PHYS:
1855 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1856 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1857 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1858 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1859 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1860 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1861 return false;
1862
1863 /*
1864 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1865 */
1866 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1867 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1868 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1869 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1870 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1871 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1872 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1873 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1874 case PGMPOOLKIND_32BIT_PD:
1875 case PGMPOOLKIND_PAE_PDPT:
1876 switch (enmKind2)
1877 {
1878 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1879 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1880 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1881 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1882 case PGMPOOLKIND_64BIT_PML4:
1883 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1884 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1885 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1886 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1887 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1888 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1889 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1890 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1891 return true;
1892 default:
1893 return false;
1894 }
1895
1896 /*
1897 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1898 */
1899 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1900 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1901 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1902 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1903 case PGMPOOLKIND_64BIT_PML4:
1904 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1905 switch (enmKind2)
1906 {
1907 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1908 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1909 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1910 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1911 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1912 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1913 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1914 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1915 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1916 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1917 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1918 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1919 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1920 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1921 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1922 return true;
1923 default:
1924 return false;
1925 }
1926
1927 /*
1928 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1929 */
1930 case PGMPOOLKIND_ROOT_NESTED:
1931 return false;
1932
1933 default:
1934 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1935 }
1936}
1937
1938
1939/**
1940 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1941 *
1942 * @returns VBox status code.
1943 * @retval VINF_PGM_CACHED_PAGE on success.
1944 * @retval VERR_FILE_NOT_FOUND if not found.
1945 * @param pPool The pool.
1946 * @param GCPhys The GC physical address of the page we're gonna shadow.
1947 * @param enmKind The kind of mapping.
1948 * @param enmAccess Access type for the mapping (only relevant for big pages)
1949 * @param iUser The shadow page pool index of the user table.
1950 * @param iUserTable The index into the user table (shadowed).
1951 * @param ppPage Where to store the pointer to the page.
1952 */
1953static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1954{
1955#ifndef IN_RC
1956 const PVM pVM = pPool->CTX_SUFF(pVM);
1957#endif
1958 /*
1959 * Look up the GCPhys in the hash.
1960 */
1961 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1962 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
1963 if (i != NIL_PGMPOOL_IDX)
1964 {
1965 do
1966 {
1967 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1968 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1969 if (pPage->GCPhys == GCPhys)
1970 {
1971 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
1972 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
1973 {
1974 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
1975 * doesn't flush it in case there are no more free use records.
1976 */
1977 pgmPoolCacheUsed(pPool, pPage);
1978
1979 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1980 if (RT_SUCCESS(rc))
1981 {
1982 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
1983 *ppPage = pPage;
1984 if (pPage->cModifications)
1985 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
1986 STAM_COUNTER_INC(&pPool->StatCacheHits);
1987 return VINF_PGM_CACHED_PAGE;
1988 }
1989 return rc;
1990 }
1991
1992 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
1993 {
1994 /*
1995 * The kind is different. In some cases we should now flush the page
1996 * as it has been reused, but in most cases this is normal remapping
1997 * of PDs as PT or big pages using the GCPhys field in a slightly
1998 * different way than the other kinds.
1999 */
2000 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2001 {
2002 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2003 pgmPoolFlushPage(pPool, pPage);
2004 break;
2005 }
2006 }
2007 }
2008
2009 /* next */
2010 i = pPage->iNext;
2011 } while (i != NIL_PGMPOOL_IDX);
2012 }
2013
2014 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2015 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2016 return VERR_FILE_NOT_FOUND;
2017}
2018
2019
2020/**
2021 * Inserts a page into the cache.
2022 *
2023 * @param pPool The pool.
2024 * @param pPage The cached page.
2025 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2026 */
2027static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2028{
2029 /*
2030 * Insert into the GCPhys hash if the page is fit for that.
2031 */
2032 Assert(!pPage->fCached);
2033 if (fCanBeCached)
2034 {
2035 pPage->fCached = true;
2036 pgmPoolHashInsert(pPool, pPage);
2037 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2038 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2039 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2040 }
2041 else
2042 {
2043 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2044 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2045 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2046 }
2047
2048 /*
2049 * Insert at the head of the age list.
2050 */
2051 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2052 pPage->iAgeNext = pPool->iAgeHead;
2053 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2054 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2055 else
2056 pPool->iAgeTail = pPage->idx;
2057 pPool->iAgeHead = pPage->idx;
2058}
2059
2060
2061/**
2062 * Flushes a cached page.
2063 *
2064 * @param pPool The pool.
2065 * @param pPage The cached page.
2066 */
2067static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2068{
2069 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2070
2071 /*
2072 * Remove the page from the hash.
2073 */
2074 if (pPage->fCached)
2075 {
2076 pPage->fCached = false;
2077 pgmPoolHashRemove(pPool, pPage);
2078 }
2079 else
2080 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2081
2082 /*
2083 * Remove it from the age list.
2084 */
2085 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2086 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2087 else
2088 pPool->iAgeTail = pPage->iAgePrev;
2089 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2090 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2091 else
2092 pPool->iAgeHead = pPage->iAgeNext;
2093 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2094 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2095}
2096
2097#endif /* PGMPOOL_WITH_CACHE */
2098#ifdef PGMPOOL_WITH_MONITORING
2099
2100/**
2101 * Looks for pages sharing the monitor.
2102 *
2103 * @returns Pointer to the head page.
2104 * @returns NULL if not found.
2105 * @param pPool The Pool
2106 * @param pNewPage The page which is going to be monitored.
2107 */
2108static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2109{
2110#ifdef PGMPOOL_WITH_CACHE
2111 /*
2112 * Look up the GCPhys in the hash.
2113 */
2114 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2115 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2116 if (i == NIL_PGMPOOL_IDX)
2117 return NULL;
2118 do
2119 {
2120 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2121 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2122 && pPage != pNewPage)
2123 {
2124 switch (pPage->enmKind)
2125 {
2126 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2127 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2128 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2129 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2130 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2131 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2132 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2133 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2134 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2135 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2136 case PGMPOOLKIND_64BIT_PML4:
2137 case PGMPOOLKIND_32BIT_PD:
2138 case PGMPOOLKIND_PAE_PDPT:
2139 {
2140 /* find the head */
2141 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2142 {
2143 Assert(pPage->iMonitoredPrev != pPage->idx);
2144 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2145 }
2146 return pPage;
2147 }
2148
2149 /* ignore, no monitoring. */
2150 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2151 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2152 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2153 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2154 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2155 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2156 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2157 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2158 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2159 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2160 case PGMPOOLKIND_ROOT_NESTED:
2161 case PGMPOOLKIND_PAE_PD_PHYS:
2162 case PGMPOOLKIND_PAE_PDPT_PHYS:
2163 case PGMPOOLKIND_32BIT_PD_PHYS:
2164 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2165 break;
2166 default:
2167 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2168 }
2169 }
2170
2171 /* next */
2172 i = pPage->iNext;
2173 } while (i != NIL_PGMPOOL_IDX);
2174#endif
2175 return NULL;
2176}
2177
2178
2179/**
2180 * Enabled write monitoring of a guest page.
2181 *
2182 * @returns VBox status code.
2183 * @retval VINF_SUCCESS on success.
2184 * @param pPool The pool.
2185 * @param pPage The cached page.
2186 */
2187static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2188{
2189 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
2190
2191 /*
2192 * Filter out the relevant kinds.
2193 */
2194 switch (pPage->enmKind)
2195 {
2196 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2197 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2198 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2199 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2200 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2201 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2202 case PGMPOOLKIND_64BIT_PML4:
2203 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2204 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2205 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2206 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2207 case PGMPOOLKIND_32BIT_PD:
2208 case PGMPOOLKIND_PAE_PDPT:
2209 break;
2210
2211 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2212 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2213 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2214 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2215 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2216 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2217 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2218 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2219 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2220 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2221 case PGMPOOLKIND_ROOT_NESTED:
2222 /* Nothing to monitor here. */
2223 return VINF_SUCCESS;
2224
2225 case PGMPOOLKIND_32BIT_PD_PHYS:
2226 case PGMPOOLKIND_PAE_PDPT_PHYS:
2227 case PGMPOOLKIND_PAE_PD_PHYS:
2228 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2229 /* Nothing to monitor here. */
2230 return VINF_SUCCESS;
2231#ifdef PGMPOOL_WITH_MIXED_PT_CR3
2232 break;
2233#else
2234 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2235#endif
2236 default:
2237 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2238 }
2239
2240 /*
2241 * Install handler.
2242 */
2243 int rc;
2244 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2245 if (pPageHead)
2246 {
2247 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2248 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2249
2250#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2251 if (pPageHead->fDirty)
2252 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, true /* force removal */);
2253#endif
2254
2255 pPage->iMonitoredPrev = pPageHead->idx;
2256 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2257 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2258 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2259 pPageHead->iMonitoredNext = pPage->idx;
2260 rc = VINF_SUCCESS;
2261 }
2262 else
2263 {
2264 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2265 PVM pVM = pPool->CTX_SUFF(pVM);
2266 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2267 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2268 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
2269 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2270 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2271 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2272 pPool->pszAccessHandler);
2273 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2274 * the heap size should suffice. */
2275 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2276 Assert(!(VMMGetCpu(pVM)->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(VMMGetCpu(pVM), VMCPU_FF_PGM_SYNC_CR3));
2277 }
2278 pPage->fMonitored = true;
2279 return rc;
2280}
2281
2282
2283/**
2284 * Disables write monitoring of a guest page.
2285 *
2286 * @returns VBox status code.
2287 * @retval VINF_SUCCESS on success.
2288 * @param pPool The pool.
2289 * @param pPage The cached page.
2290 */
2291static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2292{
2293 /*
2294 * Filter out the relevant kinds.
2295 */
2296 switch (pPage->enmKind)
2297 {
2298 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2299 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2300 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2301 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2302 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2303 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2304 case PGMPOOLKIND_64BIT_PML4:
2305 case PGMPOOLKIND_32BIT_PD:
2306 case PGMPOOLKIND_PAE_PDPT:
2307 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2308 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2309 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2310 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2311 break;
2312
2313 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2314 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2315 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2316 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2317 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2318 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2319 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2320 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2321 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2322 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2323 case PGMPOOLKIND_ROOT_NESTED:
2324 case PGMPOOLKIND_PAE_PD_PHYS:
2325 case PGMPOOLKIND_PAE_PDPT_PHYS:
2326 case PGMPOOLKIND_32BIT_PD_PHYS:
2327 /* Nothing to monitor here. */
2328 return VINF_SUCCESS;
2329
2330#ifdef PGMPOOL_WITH_MIXED_PT_CR3
2331 break;
2332#endif
2333 default:
2334 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2335 }
2336
2337 /*
2338 * Remove the page from the monitored list or uninstall it if last.
2339 */
2340 const PVM pVM = pPool->CTX_SUFF(pVM);
2341 int rc;
2342 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2343 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2344 {
2345 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2346 {
2347 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2348 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2349 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2350 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2351 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2352 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2353 pPool->pszAccessHandler);
2354 AssertFatalRCSuccess(rc);
2355 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2356 }
2357 else
2358 {
2359 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2360 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2361 {
2362 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2363 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2364 }
2365 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2366 rc = VINF_SUCCESS;
2367 }
2368 }
2369 else
2370 {
2371 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2372 AssertFatalRC(rc);
2373#ifdef VBOX_STRICT
2374 PVMCPU pVCpu = VMMGetCpu(pVM);
2375#endif
2376 AssertMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2377 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2378 }
2379 pPage->fMonitored = false;
2380
2381 /*
2382 * Remove it from the list of modified pages (if in it).
2383 */
2384 pgmPoolMonitorModifiedRemove(pPool, pPage);
2385
2386 return rc;
2387}
2388
2389
2390/**
2391 * Inserts the page into the list of modified pages.
2392 *
2393 * @param pPool The pool.
2394 * @param pPage The page.
2395 */
2396void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2397{
2398 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2399 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2400 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2401 && pPool->iModifiedHead != pPage->idx,
2402 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2403 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2404 pPool->iModifiedHead, pPool->cModifiedPages));
2405
2406 pPage->iModifiedNext = pPool->iModifiedHead;
2407 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2408 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2409 pPool->iModifiedHead = pPage->idx;
2410 pPool->cModifiedPages++;
2411#ifdef VBOX_WITH_STATISTICS
2412 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2413 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2414#endif
2415}
2416
2417
2418/**
2419 * Removes the page from the list of modified pages and resets the
2420 * moficiation counter.
2421 *
2422 * @param pPool The pool.
2423 * @param pPage The page which is believed to be in the list of modified pages.
2424 */
2425static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2426{
2427 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2428 if (pPool->iModifiedHead == pPage->idx)
2429 {
2430 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2431 pPool->iModifiedHead = pPage->iModifiedNext;
2432 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2433 {
2434 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2435 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2436 }
2437 pPool->cModifiedPages--;
2438 }
2439 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2440 {
2441 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2442 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2443 {
2444 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2445 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2446 }
2447 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2448 pPool->cModifiedPages--;
2449 }
2450 else
2451 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2452 pPage->cModifications = 0;
2453}
2454
2455
2456/**
2457 * Zaps the list of modified pages, resetting their modification counters in the process.
2458 *
2459 * @param pVM The VM handle.
2460 */
2461static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2462{
2463 pgmLock(pVM);
2464 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2465 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2466
2467 unsigned cPages = 0; NOREF(cPages);
2468
2469#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2470 pgmPoolResetDirtyPages(pVM, true /* force removal. */);
2471#endif
2472
2473 uint16_t idx = pPool->iModifiedHead;
2474 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2475 while (idx != NIL_PGMPOOL_IDX)
2476 {
2477 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2478 idx = pPage->iModifiedNext;
2479 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2480 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2481 pPage->cModifications = 0;
2482 Assert(++cPages);
2483 }
2484 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2485 pPool->cModifiedPages = 0;
2486 pgmUnlock(pVM);
2487}
2488
2489
2490#ifdef IN_RING3
2491/**
2492 * Callback to clear all shadow pages and clear all modification counters.
2493 *
2494 * @returns VBox status code.
2495 * @param pVM The VM handle.
2496 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
2497 * @param pvUser Unused parameter.
2498 *
2499 * @remark Should only be used when monitoring is available, thus placed in
2500 * the PGMPOOL_WITH_MONITORING \#ifdef.
2501 */
2502DECLCALLBACK(int) pgmPoolClearAll(PVM pVM, PVMCPU pVCpu, void *pvUser)
2503{
2504 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2505 STAM_PROFILE_START(&pPool->StatClearAll, c);
2506 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2507 NOREF(pvUser); NOREF(pVCpu);
2508
2509 pgmLock(pVM);
2510
2511#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2512 pgmPoolResetDirtyPages(pVM, true /* force removal. */);
2513#endif
2514
2515 /*
2516 * Iterate all the pages until we've encountered all that in use.
2517 * This is simple but not quite optimal solution.
2518 */
2519 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2520 unsigned cLeft = pPool->cUsedPages;
2521 unsigned iPage = pPool->cCurPages;
2522 while (--iPage >= PGMPOOL_IDX_FIRST)
2523 {
2524 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2525 if (pPage->GCPhys != NIL_RTGCPHYS)
2526 {
2527 switch (pPage->enmKind)
2528 {
2529 /*
2530 * We only care about shadow page tables.
2531 */
2532 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2533 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2534 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2535 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2536 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2537 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2538 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2539 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2540 {
2541#ifdef PGMPOOL_WITH_USER_TRACKING
2542 if (pPage->cPresent)
2543#endif
2544 {
2545 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2546 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2547 ASMMemZeroPage(pvShw);
2548 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2549#ifdef PGMPOOL_WITH_USER_TRACKING
2550 pPage->cPresent = 0;
2551 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
2552#endif
2553 }
2554#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2555 else
2556 Assert(!pPage->fDirty);
2557#endif
2558 }
2559 /* fall thru */
2560
2561 default:
2562#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2563 Assert(!pPage->fDirty);
2564#endif
2565 Assert(!pPage->cModifications || ++cModifiedPages);
2566 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2567 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2568 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2569 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2570 pPage->cModifications = 0;
2571 break;
2572
2573 }
2574 if (!--cLeft)
2575 break;
2576 }
2577 }
2578
2579 /* swipe the special pages too. */
2580 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2581 {
2582 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2583 if (pPage->GCPhys != NIL_RTGCPHYS)
2584 {
2585 Assert(!pPage->cModifications || ++cModifiedPages);
2586 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2587 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2588 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2589 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2590 pPage->cModifications = 0;
2591 }
2592 }
2593
2594#ifndef DEBUG_michael
2595 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2596#endif
2597 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2598 pPool->cModifiedPages = 0;
2599
2600#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2601 /*
2602 * Clear all the GCPhys links and rebuild the phys ext free list.
2603 */
2604 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2605 pRam;
2606 pRam = pRam->CTX_SUFF(pNext))
2607 {
2608 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2609 while (iPage-- > 0)
2610 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
2611 }
2612
2613 pPool->iPhysExtFreeHead = 0;
2614 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2615 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2616 for (unsigned i = 0; i < cMaxPhysExts; i++)
2617 {
2618 paPhysExts[i].iNext = i + 1;
2619 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2620 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2621 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2622 }
2623 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2624#endif
2625
2626#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2627 /* Clear all dirty pages. */
2628 pPool->idxFreeDirtyPage = 0;
2629 pPool->cDirtyPages = 0;
2630 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
2631 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
2632#endif
2633
2634 /* Clear the PGM_SYNC_CLEAR_PGM_POOL flag on all VCPUs to prevent redundant flushes. */
2635 for (unsigned idCpu = 0; idCpu < pVM->cCPUs; idCpu++)
2636 {
2637 PVMCPU pVCpu = &pVM->aCpus[idCpu];
2638
2639 pVCpu->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2640 }
2641
2642 pPool->cPresent = 0;
2643 pgmUnlock(pVM);
2644 PGM_INVL_ALL_VCPU_TLBS(pVM);
2645 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2646 return VINF_SUCCESS;
2647}
2648#endif /* IN_RING3 */
2649
2650
2651/**
2652 * Handle SyncCR3 pool tasks
2653 *
2654 * @returns VBox status code.
2655 * @retval VINF_SUCCESS if successfully added.
2656 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2657 * @param pVCpu The VMCPU handle.
2658 * @remark Should only be used when monitoring is available, thus placed in
2659 * the PGMPOOL_WITH_MONITORING #ifdef.
2660 */
2661int pgmPoolSyncCR3(PVMCPU pVCpu)
2662{
2663 PVM pVM = pVCpu->CTX_SUFF(pVM);
2664 LogFlow(("pgmPoolSyncCR3\n"));
2665
2666 /*
2667 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2668 * Occasionally we will have to clear all the shadow page tables because we wanted
2669 * to monitor a page which was mapped by too many shadowed page tables. This operation
2670 * sometimes refered to as a 'lightweight flush'.
2671 */
2672# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2673 if (ASMBitTestAndClear(&pVCpu->pgm.s.fSyncFlags, PGM_SYNC_CLEAR_PGM_POOL_BIT))
2674 {
2675 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmPoolClearAll, NULL);
2676 AssertRC(rc);
2677 }
2678# else /* !IN_RING3 */
2679 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2680 {
2681 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2682 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2683 return VINF_PGM_SYNC_CR3;
2684 }
2685# endif /* !IN_RING3 */
2686 else
2687 pgmPoolMonitorModifiedClearAll(pVM);
2688
2689 return VINF_SUCCESS;
2690}
2691
2692#endif /* PGMPOOL_WITH_MONITORING */
2693#ifdef PGMPOOL_WITH_USER_TRACKING
2694
2695/**
2696 * Frees up at least one user entry.
2697 *
2698 * @returns VBox status code.
2699 * @retval VINF_SUCCESS if successfully added.
2700 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2701 * @param pPool The pool.
2702 * @param iUser The user index.
2703 */
2704static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2705{
2706 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2707#ifdef PGMPOOL_WITH_CACHE
2708 /*
2709 * Just free cached pages in a braindead fashion.
2710 */
2711 /** @todo walk the age list backwards and free the first with usage. */
2712 int rc = VINF_SUCCESS;
2713 do
2714 {
2715 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2716 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2717 rc = rc2;
2718 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2719 return rc;
2720#else
2721 /*
2722 * Lazy approach.
2723 */
2724 /* @todo This path no longer works (CR3 root pages will be flushed)!! */
2725 AssertCompileFailed();
2726 Assert(!CPUMIsGuestInLongMode(pVM));
2727 pgmPoolFlushAllInt(pPool);
2728 return VERR_PGM_POOL_FLUSHED;
2729#endif
2730}
2731
2732
2733/**
2734 * Inserts a page into the cache.
2735 *
2736 * This will create user node for the page, insert it into the GCPhys
2737 * hash, and insert it into the age list.
2738 *
2739 * @returns VBox status code.
2740 * @retval VINF_SUCCESS if successfully added.
2741 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2742 * @param pPool The pool.
2743 * @param pPage The cached page.
2744 * @param GCPhys The GC physical address of the page we're gonna shadow.
2745 * @param iUser The user index.
2746 * @param iUserTable The user table index.
2747 */
2748DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2749{
2750 int rc = VINF_SUCCESS;
2751 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2752
2753 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2754
2755#ifdef VBOX_STRICT
2756 /*
2757 * Check that the entry doesn't already exists.
2758 */
2759 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2760 {
2761 uint16_t i = pPage->iUserHead;
2762 do
2763 {
2764 Assert(i < pPool->cMaxUsers);
2765 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2766 i = paUsers[i].iNext;
2767 } while (i != NIL_PGMPOOL_USER_INDEX);
2768 }
2769#endif
2770
2771 /*
2772 * Find free a user node.
2773 */
2774 uint16_t i = pPool->iUserFreeHead;
2775 if (i == NIL_PGMPOOL_USER_INDEX)
2776 {
2777 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2778 if (RT_FAILURE(rc))
2779 return rc;
2780 i = pPool->iUserFreeHead;
2781 }
2782
2783 /*
2784 * Unlink the user node from the free list,
2785 * initialize and insert it into the user list.
2786 */
2787 pPool->iUserFreeHead = paUsers[i].iNext;
2788 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2789 paUsers[i].iUser = iUser;
2790 paUsers[i].iUserTable = iUserTable;
2791 pPage->iUserHead = i;
2792
2793 /*
2794 * Insert into cache and enable monitoring of the guest page if enabled.
2795 *
2796 * Until we implement caching of all levels, including the CR3 one, we'll
2797 * have to make sure we don't try monitor & cache any recursive reuse of
2798 * a monitored CR3 page. Because all windows versions are doing this we'll
2799 * have to be able to do combined access monitoring, CR3 + PT and
2800 * PD + PT (guest PAE).
2801 *
2802 * Update:
2803 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2804 */
2805#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2806# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2807 const bool fCanBeMonitored = true;
2808# else
2809 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2810 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2811 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2812# endif
2813# ifdef PGMPOOL_WITH_CACHE
2814 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2815# endif
2816 if (fCanBeMonitored)
2817 {
2818# ifdef PGMPOOL_WITH_MONITORING
2819 rc = pgmPoolMonitorInsert(pPool, pPage);
2820 AssertRC(rc);
2821 }
2822# endif
2823#endif /* PGMPOOL_WITH_MONITORING */
2824 return rc;
2825}
2826
2827
2828# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2829/**
2830 * Adds a user reference to a page.
2831 *
2832 * This will move the page to the head of the
2833 *
2834 * @returns VBox status code.
2835 * @retval VINF_SUCCESS if successfully added.
2836 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2837 * @param pPool The pool.
2838 * @param pPage The cached page.
2839 * @param iUser The user index.
2840 * @param iUserTable The user table.
2841 */
2842static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2843{
2844 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2845
2846 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2847
2848# ifdef VBOX_STRICT
2849 /*
2850 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2851 */
2852 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2853 {
2854 uint16_t i = pPage->iUserHead;
2855 do
2856 {
2857 Assert(i < pPool->cMaxUsers);
2858 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2859 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2860 i = paUsers[i].iNext;
2861 } while (i != NIL_PGMPOOL_USER_INDEX);
2862 }
2863# endif
2864
2865 /*
2866 * Allocate a user node.
2867 */
2868 uint16_t i = pPool->iUserFreeHead;
2869 if (i == NIL_PGMPOOL_USER_INDEX)
2870 {
2871 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2872 if (RT_FAILURE(rc))
2873 return rc;
2874 i = pPool->iUserFreeHead;
2875 }
2876 pPool->iUserFreeHead = paUsers[i].iNext;
2877
2878 /*
2879 * Initialize the user node and insert it.
2880 */
2881 paUsers[i].iNext = pPage->iUserHead;
2882 paUsers[i].iUser = iUser;
2883 paUsers[i].iUserTable = iUserTable;
2884 pPage->iUserHead = i;
2885
2886# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2887 if (pPage->fDirty)
2888 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, true /* force removal */);
2889# endif
2890
2891# ifdef PGMPOOL_WITH_CACHE
2892 /*
2893 * Tell the cache to update its replacement stats for this page.
2894 */
2895 pgmPoolCacheUsed(pPool, pPage);
2896# endif
2897 return VINF_SUCCESS;
2898}
2899# endif /* PGMPOOL_WITH_CACHE */
2900
2901
2902/**
2903 * Frees a user record associated with a page.
2904 *
2905 * This does not clear the entry in the user table, it simply replaces the
2906 * user record to the chain of free records.
2907 *
2908 * @param pPool The pool.
2909 * @param HCPhys The HC physical address of the shadow page.
2910 * @param iUser The shadow page pool index of the user table.
2911 * @param iUserTable The index into the user table (shadowed).
2912 */
2913static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2914{
2915 /*
2916 * Unlink and free the specified user entry.
2917 */
2918 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2919
2920 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2921 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2922 uint16_t i = pPage->iUserHead;
2923 if ( i != NIL_PGMPOOL_USER_INDEX
2924 && paUsers[i].iUser == iUser
2925 && paUsers[i].iUserTable == iUserTable)
2926 {
2927 pPage->iUserHead = paUsers[i].iNext;
2928
2929 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2930 paUsers[i].iNext = pPool->iUserFreeHead;
2931 pPool->iUserFreeHead = i;
2932 return;
2933 }
2934
2935 /* General: Linear search. */
2936 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2937 while (i != NIL_PGMPOOL_USER_INDEX)
2938 {
2939 if ( paUsers[i].iUser == iUser
2940 && paUsers[i].iUserTable == iUserTable)
2941 {
2942 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2943 paUsers[iPrev].iNext = paUsers[i].iNext;
2944 else
2945 pPage->iUserHead = paUsers[i].iNext;
2946
2947 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2948 paUsers[i].iNext = pPool->iUserFreeHead;
2949 pPool->iUserFreeHead = i;
2950 return;
2951 }
2952 iPrev = i;
2953 i = paUsers[i].iNext;
2954 }
2955
2956 /* Fatal: didn't find it */
2957 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2958 iUser, iUserTable, pPage->GCPhys));
2959}
2960
2961
2962/**
2963 * Gets the entry size of a shadow table.
2964 *
2965 * @param enmKind The kind of page.
2966 *
2967 * @returns The size of the entry in bytes. That is, 4 or 8.
2968 * @returns If the kind is not for a table, an assertion is raised and 0 is
2969 * returned.
2970 */
2971DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2972{
2973 switch (enmKind)
2974 {
2975 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2976 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2977 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2978 case PGMPOOLKIND_32BIT_PD:
2979 case PGMPOOLKIND_32BIT_PD_PHYS:
2980 return 4;
2981
2982 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2983 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2984 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2985 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2986 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2987 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2988 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2989 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2990 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2991 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2992 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2993 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2994 case PGMPOOLKIND_64BIT_PML4:
2995 case PGMPOOLKIND_PAE_PDPT:
2996 case PGMPOOLKIND_ROOT_NESTED:
2997 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2998 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2999 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3000 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3001 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3002 case PGMPOOLKIND_PAE_PD_PHYS:
3003 case PGMPOOLKIND_PAE_PDPT_PHYS:
3004 return 8;
3005
3006 default:
3007 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3008 }
3009}
3010
3011
3012/**
3013 * Gets the entry size of a guest table.
3014 *
3015 * @param enmKind The kind of page.
3016 *
3017 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3018 * @returns If the kind is not for a table, an assertion is raised and 0 is
3019 * returned.
3020 */
3021DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3022{
3023 switch (enmKind)
3024 {
3025 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3026 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3027 case PGMPOOLKIND_32BIT_PD:
3028 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3029 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3030 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3031 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3032 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3033 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3034 return 4;
3035
3036 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3037 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3038 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3039 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3040 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3041 case PGMPOOLKIND_64BIT_PML4:
3042 case PGMPOOLKIND_PAE_PDPT:
3043 return 8;
3044
3045 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3046 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3047 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3048 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3049 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3050 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3051 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3052 case PGMPOOLKIND_ROOT_NESTED:
3053 case PGMPOOLKIND_PAE_PD_PHYS:
3054 case PGMPOOLKIND_PAE_PDPT_PHYS:
3055 case PGMPOOLKIND_32BIT_PD_PHYS:
3056 /** @todo can we return 0? (nobody is calling this...) */
3057 AssertFailed();
3058 return 0;
3059
3060 default:
3061 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3062 }
3063}
3064
3065#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3066
3067/**
3068 * Scans one shadow page table for mappings of a physical page.
3069 *
3070 * @param pVM The VM handle.
3071 * @param pPhysPage The guest page in question.
3072 * @param iShw The shadow page table.
3073 * @param cRefs The number of references made in that PT.
3074 */
3075static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
3076{
3077 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
3078 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3079
3080 /*
3081 * Assert sanity.
3082 */
3083 Assert(cRefs == 1);
3084 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3085 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3086
3087 /*
3088 * Then, clear the actual mappings to the page in the shadow PT.
3089 */
3090 switch (pPage->enmKind)
3091 {
3092 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3093 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3094 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3095 {
3096 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3097 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3098 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3099 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3100 {
3101 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
3102 pPT->a[i].u = 0;
3103 cRefs--;
3104 if (!cRefs)
3105 return;
3106 }
3107#ifdef LOG_ENABLED
3108 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3109 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3110 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3111 {
3112 Log(("i=%d cRefs=%d\n", i, cRefs--));
3113 }
3114#endif
3115 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3116 break;
3117 }
3118
3119 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3120 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3121 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3122 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3123 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3124 {
3125 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3126 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3127 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3128 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3129 {
3130 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3131 pPT->a[i].u = 0;
3132 cRefs--;
3133 if (!cRefs)
3134 return;
3135 }
3136#ifdef LOG_ENABLED
3137 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3138 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3139 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3140 {
3141 Log(("i=%d cRefs=%d\n", i, cRefs--));
3142 }
3143#endif
3144 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
3145 break;
3146 }
3147
3148 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3149 {
3150 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3151 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3152 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3153 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3154 {
3155 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3156 pPT->a[i].u = 0;
3157 cRefs--;
3158 if (!cRefs)
3159 return;
3160 }
3161#ifdef LOG_ENABLED
3162 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3163 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3164 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3165 {
3166 Log(("i=%d cRefs=%d\n", i, cRefs--));
3167 }
3168#endif
3169 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3170 break;
3171 }
3172
3173 default:
3174 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3175 }
3176}
3177
3178
3179/**
3180 * Scans one shadow page table for mappings of a physical page.
3181 *
3182 * @param pVM The VM handle.
3183 * @param pPhysPage The guest page in question.
3184 * @param iShw The shadow page table.
3185 * @param cRefs The number of references made in that PT.
3186 */
3187void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
3188{
3189 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3190 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
3191 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3192 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
3193 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3194 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3195}
3196
3197
3198/**
3199 * Flushes a list of shadow page tables mapping the same physical page.
3200 *
3201 * @param pVM The VM handle.
3202 * @param pPhysPage The guest page in question.
3203 * @param iPhysExt The physical cross reference extent list to flush.
3204 */
3205void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
3206{
3207 Assert(PGMIsLockOwner(pVM));
3208 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3209 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3210 LogFlow(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%R[pgmpage] iPhysExt\n", pPhysPage, iPhysExt));
3211
3212 const uint16_t iPhysExtStart = iPhysExt;
3213 PPGMPOOLPHYSEXT pPhysExt;
3214 do
3215 {
3216 Assert(iPhysExt < pPool->cMaxPhysExts);
3217 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3218 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3219 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3220 {
3221 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
3222 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3223 }
3224
3225 /* next */
3226 iPhysExt = pPhysExt->iNext;
3227 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3228
3229 /* insert the list into the free list and clear the ram range entry. */
3230 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3231 pPool->iPhysExtFreeHead = iPhysExtStart;
3232 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3233
3234 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3235}
3236
3237#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3238
3239/**
3240 * Flushes all shadow page table mappings of the given guest page.
3241 *
3242 * This is typically called when the host page backing the guest one has been
3243 * replaced or when the page protection was changed due to an access handler.
3244 *
3245 * @returns VBox status code.
3246 * @retval VINF_SUCCESS if all references has been successfully cleared.
3247 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3248 * pool cleaning. FF and sync flags are set.
3249 *
3250 * @param pVM The VM handle.
3251 * @param pPhysPage The guest page in question.
3252 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3253 * flushed, it is NOT touched if this isn't necessary.
3254 * The caller MUST initialized this to @a false.
3255 */
3256int pgmPoolTrackFlushGCPhys(PVM pVM, PPGMPAGE pPhysPage, bool *pfFlushTLBs)
3257{
3258 PVMCPU pVCpu = VMMGetCpu(pVM);
3259 pgmLock(pVM);
3260 int rc = VINF_SUCCESS;
3261#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3262 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3263 if (u16)
3264 {
3265 /*
3266 * The zero page is currently screwing up the tracking and we'll
3267 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3268 * is defined, zero pages won't normally be mapped. Some kind of solution
3269 * will be needed for this problem of course, but it will have to wait...
3270 */
3271 if (PGM_PAGE_IS_ZERO(pPhysPage))
3272 rc = VINF_PGM_GCPHYS_ALIASED;
3273 else
3274 {
3275# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3276 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3277 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3278 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3279# endif
3280
3281 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3282 pgmPoolTrackFlushGCPhysPT(pVM,
3283 pPhysPage,
3284 PGMPOOL_TD_GET_IDX(u16),
3285 PGMPOOL_TD_GET_CREFS(u16));
3286 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3287 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, PGMPOOL_TD_GET_IDX(u16));
3288 else
3289 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3290 *pfFlushTLBs = true;
3291
3292# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3293 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3294# endif
3295 }
3296 }
3297
3298#elif defined(PGMPOOL_WITH_CACHE)
3299 if (PGM_PAGE_IS_ZERO(pPhysPage))
3300 rc = VINF_PGM_GCPHYS_ALIASED;
3301 else
3302 {
3303# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3304 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow kills the pool otherwise. */
3305 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3306# endif
3307 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3308 if (rc == VINF_SUCCESS)
3309 *pfFlushTLBs = true;
3310 }
3311
3312# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3313 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3314# endif
3315
3316#else
3317 rc = VINF_PGM_GCPHYS_ALIASED;
3318#endif
3319
3320 if (rc == VINF_PGM_GCPHYS_ALIASED)
3321 {
3322 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3323 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3324 rc = VINF_PGM_SYNC_CR3;
3325 }
3326 pgmUnlock(pVM);
3327 return rc;
3328}
3329
3330
3331/**
3332 * Scans all shadow page tables for mappings of a physical page.
3333 *
3334 * This may be slow, but it's most likely more efficient than cleaning
3335 * out the entire page pool / cache.
3336 *
3337 * @returns VBox status code.
3338 * @retval VINF_SUCCESS if all references has been successfully cleared.
3339 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3340 * a page pool cleaning.
3341 *
3342 * @param pVM The VM handle.
3343 * @param pPhysPage The guest page in question.
3344 */
3345int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3346{
3347 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3348 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3349 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3350 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3351
3352#if 1
3353 /*
3354 * There is a limit to what makes sense.
3355 */
3356 if (pPool->cPresent > 1024)
3357 {
3358 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3359 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3360 return VINF_PGM_GCPHYS_ALIASED;
3361 }
3362#endif
3363
3364 /*
3365 * Iterate all the pages until we've encountered all that in use.
3366 * This is simple but not quite optimal solution.
3367 */
3368 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3369 const uint32_t u32 = u64;
3370 unsigned cLeft = pPool->cUsedPages;
3371 unsigned iPage = pPool->cCurPages;
3372 while (--iPage >= PGMPOOL_IDX_FIRST)
3373 {
3374 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3375 if (pPage->GCPhys != NIL_RTGCPHYS)
3376 {
3377 switch (pPage->enmKind)
3378 {
3379 /*
3380 * We only care about shadow page tables.
3381 */
3382 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3383 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3384 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3385 {
3386 unsigned cPresent = pPage->cPresent;
3387 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3388 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3389 if (pPT->a[i].n.u1Present)
3390 {
3391 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3392 {
3393 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3394 pPT->a[i].u = 0;
3395 }
3396 if (!--cPresent)
3397 break;
3398 }
3399 break;
3400 }
3401
3402 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3403 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3404 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3405 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3406 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3407 {
3408 unsigned cPresent = pPage->cPresent;
3409 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3410 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3411 if (pPT->a[i].n.u1Present)
3412 {
3413 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3414 {
3415 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3416 pPT->a[i].u = 0;
3417 }
3418 if (!--cPresent)
3419 break;
3420 }
3421 break;
3422 }
3423 }
3424 if (!--cLeft)
3425 break;
3426 }
3427 }
3428
3429 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3430 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3431 return VINF_SUCCESS;
3432}
3433
3434
3435/**
3436 * Clears the user entry in a user table.
3437 *
3438 * This is used to remove all references to a page when flushing it.
3439 */
3440static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3441{
3442 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3443 Assert(pUser->iUser < pPool->cCurPages);
3444 uint32_t iUserTable = pUser->iUserTable;
3445
3446 /*
3447 * Map the user page.
3448 */
3449 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3450 union
3451 {
3452 uint64_t *pau64;
3453 uint32_t *pau32;
3454 } u;
3455 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3456
3457 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3458
3459 /* Safety precaution in case we change the paging for other modes too in the future. */
3460 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3461
3462#ifdef VBOX_STRICT
3463 /*
3464 * Some sanity checks.
3465 */
3466 switch (pUserPage->enmKind)
3467 {
3468 case PGMPOOLKIND_32BIT_PD:
3469 case PGMPOOLKIND_32BIT_PD_PHYS:
3470 Assert(iUserTable < X86_PG_ENTRIES);
3471 break;
3472 case PGMPOOLKIND_PAE_PDPT:
3473 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3474 case PGMPOOLKIND_PAE_PDPT_PHYS:
3475 Assert(iUserTable < 4);
3476 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3477 break;
3478 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3479 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3480 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3481 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3482 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3483 case PGMPOOLKIND_PAE_PD_PHYS:
3484 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3485 break;
3486 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3487 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3488 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3489 break;
3490 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3491 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3492 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3493 break;
3494 case PGMPOOLKIND_64BIT_PML4:
3495 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3496 /* GCPhys >> PAGE_SHIFT is the index here */
3497 break;
3498 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3499 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3500 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3501 break;
3502
3503 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3504 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3505 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3506 break;
3507
3508 case PGMPOOLKIND_ROOT_NESTED:
3509 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3510 break;
3511
3512 default:
3513 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3514 break;
3515 }
3516#endif /* VBOX_STRICT */
3517
3518 /*
3519 * Clear the entry in the user page.
3520 */
3521 switch (pUserPage->enmKind)
3522 {
3523 /* 32-bit entries */
3524 case PGMPOOLKIND_32BIT_PD:
3525 case PGMPOOLKIND_32BIT_PD_PHYS:
3526 u.pau32[iUserTable] = 0;
3527 break;
3528
3529 /* 64-bit entries */
3530 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3531 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3532 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3533 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3534 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3535#if defined(IN_RC)
3536 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3537 * non-present PDPT will continue to cause page faults.
3538 */
3539 ASMReloadCR3();
3540#endif
3541 /* no break */
3542 case PGMPOOLKIND_PAE_PD_PHYS:
3543 case PGMPOOLKIND_PAE_PDPT_PHYS:
3544 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3545 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3546 case PGMPOOLKIND_64BIT_PML4:
3547 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3548 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3549 case PGMPOOLKIND_PAE_PDPT:
3550 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3551 case PGMPOOLKIND_ROOT_NESTED:
3552 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3553 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3554 u.pau64[iUserTable] = 0;
3555 break;
3556
3557 default:
3558 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3559 }
3560}
3561
3562
3563/**
3564 * Clears all users of a page.
3565 */
3566static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3567{
3568 /*
3569 * Free all the user records.
3570 */
3571 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3572
3573 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3574 uint16_t i = pPage->iUserHead;
3575 while (i != NIL_PGMPOOL_USER_INDEX)
3576 {
3577 /* Clear enter in user table. */
3578 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3579
3580 /* Free it. */
3581 const uint16_t iNext = paUsers[i].iNext;
3582 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3583 paUsers[i].iNext = pPool->iUserFreeHead;
3584 pPool->iUserFreeHead = i;
3585
3586 /* Next. */
3587 i = iNext;
3588 }
3589 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3590}
3591
3592#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3593
3594/**
3595 * Allocates a new physical cross reference extent.
3596 *
3597 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3598 * @param pVM The VM handle.
3599 * @param piPhysExt Where to store the phys ext index.
3600 */
3601PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3602{
3603 Assert(PGMIsLockOwner(pVM));
3604 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3605 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3606 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3607 {
3608 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3609 return NULL;
3610 }
3611 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3612 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3613 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3614 *piPhysExt = iPhysExt;
3615 return pPhysExt;
3616}
3617
3618
3619/**
3620 * Frees a physical cross reference extent.
3621 *
3622 * @param pVM The VM handle.
3623 * @param iPhysExt The extent to free.
3624 */
3625void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3626{
3627 Assert(PGMIsLockOwner(pVM));
3628 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3629 Assert(iPhysExt < pPool->cMaxPhysExts);
3630 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3631 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3632 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3633 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3634 pPool->iPhysExtFreeHead = iPhysExt;
3635}
3636
3637
3638/**
3639 * Frees a physical cross reference extent.
3640 *
3641 * @param pVM The VM handle.
3642 * @param iPhysExt The extent to free.
3643 */
3644void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3645{
3646 Assert(PGMIsLockOwner(pVM));
3647 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3648
3649 const uint16_t iPhysExtStart = iPhysExt;
3650 PPGMPOOLPHYSEXT pPhysExt;
3651 do
3652 {
3653 Assert(iPhysExt < pPool->cMaxPhysExts);
3654 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3655 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3656 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3657
3658 /* next */
3659 iPhysExt = pPhysExt->iNext;
3660 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3661
3662 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3663 pPool->iPhysExtFreeHead = iPhysExtStart;
3664}
3665
3666
3667/**
3668 * Insert a reference into a list of physical cross reference extents.
3669 *
3670 * @returns The new tracking data for PGMPAGE.
3671 *
3672 * @param pVM The VM handle.
3673 * @param iPhysExt The physical extent index of the list head.
3674 * @param iShwPT The shadow page table index.
3675 *
3676 */
3677static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3678{
3679 Assert(PGMIsLockOwner(pVM));
3680 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3681 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3682
3683 /* special common case. */
3684 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3685 {
3686 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3687 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3688 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d}\n", iPhysExt, iShwPT));
3689 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3690 }
3691
3692 /* general treatment. */
3693 const uint16_t iPhysExtStart = iPhysExt;
3694 unsigned cMax = 15;
3695 for (;;)
3696 {
3697 Assert(iPhysExt < pPool->cMaxPhysExts);
3698 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3699 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3700 {
3701 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3702 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3703 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3704 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3705 }
3706 if (!--cMax)
3707 {
3708 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3709 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3710 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3711 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3712 }
3713 }
3714
3715 /* add another extent to the list. */
3716 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3717 if (!pNew)
3718 {
3719 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3720 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3721 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3722 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3723 }
3724 pNew->iNext = iPhysExtStart;
3725 pNew->aidx[0] = iShwPT;
3726 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3727 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3728}
3729
3730
3731/**
3732 * Add a reference to guest physical page where extents are in use.
3733 *
3734 * @returns The new tracking data for PGMPAGE.
3735 *
3736 * @param pVM The VM handle.
3737 * @param u16 The ram range flags (top 16-bits).
3738 * @param iShwPT The shadow page table index.
3739 */
3740uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3741{
3742 pgmLock(pVM);
3743 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3744 {
3745 /*
3746 * Convert to extent list.
3747 */
3748 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3749 uint16_t iPhysExt;
3750 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3751 if (pPhysExt)
3752 {
3753 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3754 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3755 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3756 pPhysExt->aidx[1] = iShwPT;
3757 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3758 }
3759 else
3760 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3761 }
3762 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3763 {
3764 /*
3765 * Insert into the extent list.
3766 */
3767 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3768 }
3769 else
3770 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3771 pgmUnlock(pVM);
3772 return u16;
3773}
3774
3775
3776/**
3777 * Clear references to guest physical memory.
3778 *
3779 * @param pPool The pool.
3780 * @param pPage The page.
3781 * @param pPhysPage Pointer to the aPages entry in the ram range.
3782 */
3783void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3784{
3785 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3786 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3787
3788 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3789 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3790 {
3791 PVM pVM = pPool->CTX_SUFF(pVM);
3792 pgmLock(pVM);
3793
3794 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3795 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3796 do
3797 {
3798 Assert(iPhysExt < pPool->cMaxPhysExts);
3799
3800 /*
3801 * Look for the shadow page and check if it's all freed.
3802 */
3803 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3804 {
3805 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3806 {
3807 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3808
3809 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3810 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3811 {
3812 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3813 pgmUnlock(pVM);
3814 return;
3815 }
3816
3817 /* we can free the node. */
3818 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3819 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3820 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3821 {
3822 /* lonely node */
3823 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3824 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3825 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3826 }
3827 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3828 {
3829 /* head */
3830 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3831 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3832 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3833 }
3834 else
3835 {
3836 /* in list */
3837 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3838 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3839 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3840 }
3841 iPhysExt = iPhysExtNext;
3842 pgmUnlock(pVM);
3843 return;
3844 }
3845 }
3846
3847 /* next */
3848 iPhysExtPrev = iPhysExt;
3849 iPhysExt = paPhysExts[iPhysExt].iNext;
3850 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3851
3852 pgmUnlock(pVM);
3853 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3854 }
3855 else /* nothing to do */
3856 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3857}
3858
3859
3860/**
3861 * Clear references to guest physical memory.
3862 *
3863 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3864 * is assumed to be correct, so the linear search can be skipped and we can assert
3865 * at an earlier point.
3866 *
3867 * @param pPool The pool.
3868 * @param pPage The page.
3869 * @param HCPhys The host physical address corresponding to the guest page.
3870 * @param GCPhys The guest physical address corresponding to HCPhys.
3871 */
3872static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3873{
3874 /*
3875 * Walk range list.
3876 */
3877 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3878 while (pRam)
3879 {
3880 RTGCPHYS off = GCPhys - pRam->GCPhys;
3881 if (off < pRam->cb)
3882 {
3883 /* does it match? */
3884 const unsigned iPage = off >> PAGE_SHIFT;
3885 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3886#ifdef LOG_ENABLED
3887RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3888Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3889#endif
3890 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3891 {
3892 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3893 return;
3894 }
3895 break;
3896 }
3897 pRam = pRam->CTX_SUFF(pNext);
3898 }
3899 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3900}
3901
3902
3903/**
3904 * Clear references to guest physical memory.
3905 *
3906 * @param pPool The pool.
3907 * @param pPage The page.
3908 * @param HCPhys The host physical address corresponding to the guest page.
3909 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3910 */
3911void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3912{
3913 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
3914
3915 /*
3916 * Walk range list.
3917 */
3918 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3919 while (pRam)
3920 {
3921 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3922 if (off < pRam->cb)
3923 {
3924 /* does it match? */
3925 const unsigned iPage = off >> PAGE_SHIFT;
3926 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3927 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3928 {
3929 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3930 return;
3931 }
3932 break;
3933 }
3934 pRam = pRam->CTX_SUFF(pNext);
3935 }
3936
3937 /*
3938 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3939 */
3940 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3941 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3942 while (pRam)
3943 {
3944 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3945 while (iPage-- > 0)
3946 {
3947 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3948 {
3949 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3950 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3951 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3952 return;
3953 }
3954 }
3955 pRam = pRam->CTX_SUFF(pNext);
3956 }
3957
3958 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3959}
3960
3961
3962/**
3963 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3964 *
3965 * @param pPool The pool.
3966 * @param pPage The page.
3967 * @param pShwPT The shadow page table (mapping of the page).
3968 * @param pGstPT The guest page table.
3969 */
3970DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3971{
3972 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3973 if (pShwPT->a[i].n.u1Present)
3974 {
3975 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3976 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3977 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3978 if (!--pPage->cPresent)
3979 break;
3980 }
3981}
3982
3983
3984/**
3985 * Clear references to guest physical memory in a PAE / 32-bit page table.
3986 *
3987 * @param pPool The pool.
3988 * @param pPage The page.
3989 * @param pShwPT The shadow page table (mapping of the page).
3990 * @param pGstPT The guest page table (just a half one).
3991 */
3992DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3993{
3994 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3995 if (pShwPT->a[i].n.u1Present)
3996 {
3997 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
3998 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3999 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
4000 }
4001}
4002
4003
4004/**
4005 * Clear references to guest physical memory in a PAE / PAE page table.
4006 *
4007 * @param pPool The pool.
4008 * @param pPage The page.
4009 * @param pShwPT The shadow page table (mapping of the page).
4010 * @param pGstPT The guest page table.
4011 */
4012DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
4013{
4014 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
4015 if (pShwPT->a[i].n.u1Present)
4016 {
4017 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4018 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4019 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
4020 }
4021}
4022
4023
4024/**
4025 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4026 *
4027 * @param pPool The pool.
4028 * @param pPage The page.
4029 * @param pShwPT The shadow page table (mapping of the page).
4030 */
4031DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4032{
4033 RTGCPHYS GCPhys = pPage->GCPhys;
4034 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4035 if (pShwPT->a[i].n.u1Present)
4036 {
4037 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4038 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4039 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
4040 }
4041}
4042
4043
4044/**
4045 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4046 *
4047 * @param pPool The pool.
4048 * @param pPage The page.
4049 * @param pShwPT The shadow page table (mapping of the page).
4050 */
4051DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
4052{
4053 RTGCPHYS GCPhys = pPage->GCPhys;
4054 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4055 if (pShwPT->a[i].n.u1Present)
4056 {
4057 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4058 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
4059 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
4060 }
4061}
4062
4063#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
4064
4065
4066/**
4067 * Clear references to shadowed pages in a 32 bits page directory.
4068 *
4069 * @param pPool The pool.
4070 * @param pPage The page.
4071 * @param pShwPD The shadow page directory (mapping of the page).
4072 */
4073DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4074{
4075 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4076 {
4077 if ( pShwPD->a[i].n.u1Present
4078 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4079 )
4080 {
4081 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4082 if (pSubPage)
4083 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4084 else
4085 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4086 }
4087 }
4088}
4089
4090/**
4091 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4092 *
4093 * @param pPool The pool.
4094 * @param pPage The page.
4095 * @param pShwPD The shadow page directory (mapping of the page).
4096 */
4097DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4098{
4099 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4100 {
4101 if ( pShwPD->a[i].n.u1Present
4102 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4103 )
4104 {
4105 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4106 if (pSubPage)
4107 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4108 else
4109 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4110 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4111 }
4112 }
4113}
4114
4115/**
4116 * Clear references to shadowed pages in a PAE page directory pointer table.
4117 *
4118 * @param pPool The pool.
4119 * @param pPage The page.
4120 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4121 */
4122DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4123{
4124 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4125 {
4126 if ( pShwPDPT->a[i].n.u1Present
4127 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4128 )
4129 {
4130 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4131 if (pSubPage)
4132 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4133 else
4134 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4135 }
4136 }
4137}
4138
4139
4140/**
4141 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4142 *
4143 * @param pPool The pool.
4144 * @param pPage The page.
4145 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4146 */
4147DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4148{
4149 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4150 {
4151 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
4152 if (pShwPDPT->a[i].n.u1Present)
4153 {
4154 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4155 if (pSubPage)
4156 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4157 else
4158 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4159 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4160 }
4161 }
4162}
4163
4164
4165/**
4166 * Clear references to shadowed pages in a 64-bit level 4 page table.
4167 *
4168 * @param pPool The pool.
4169 * @param pPage The page.
4170 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4171 */
4172DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4173{
4174 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4175 {
4176 if (pShwPML4->a[i].n.u1Present)
4177 {
4178 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4179 if (pSubPage)
4180 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4181 else
4182 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4183 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4184 }
4185 }
4186}
4187
4188
4189/**
4190 * Clear references to shadowed pages in an EPT page table.
4191 *
4192 * @param pPool The pool.
4193 * @param pPage The page.
4194 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4195 */
4196DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4197{
4198 RTGCPHYS GCPhys = pPage->GCPhys;
4199 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4200 if (pShwPT->a[i].n.u1Present)
4201 {
4202 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4203 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4204 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
4205 }
4206}
4207
4208
4209/**
4210 * Clear references to shadowed pages in an EPT page directory.
4211 *
4212 * @param pPool The pool.
4213 * @param pPage The page.
4214 * @param pShwPD The shadow page directory (mapping of the page).
4215 */
4216DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4217{
4218 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4219 {
4220 if (pShwPD->a[i].n.u1Present)
4221 {
4222 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4223 if (pSubPage)
4224 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4225 else
4226 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4227 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4228 }
4229 }
4230}
4231
4232
4233/**
4234 * Clear references to shadowed pages in an EPT page directory pointer table.
4235 *
4236 * @param pPool The pool.
4237 * @param pPage The page.
4238 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4239 */
4240DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4241{
4242 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4243 {
4244 if (pShwPDPT->a[i].n.u1Present)
4245 {
4246 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4247 if (pSubPage)
4248 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4249 else
4250 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4251 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4252 }
4253 }
4254}
4255
4256
4257/**
4258 * Clears all references made by this page.
4259 *
4260 * This includes other shadow pages and GC physical addresses.
4261 *
4262 * @param pPool The pool.
4263 * @param pPage The page.
4264 */
4265static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4266{
4267 /*
4268 * Map the shadow page and take action according to the page kind.
4269 */
4270 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
4271 switch (pPage->enmKind)
4272 {
4273#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4274 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4275 {
4276 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4277 void *pvGst;
4278 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4279 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4280 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4281 break;
4282 }
4283
4284 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4285 {
4286 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4287 void *pvGst;
4288 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4289 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
4290 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4291 break;
4292 }
4293
4294 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4295 {
4296 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4297 void *pvGst;
4298 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4299 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
4300 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4301 break;
4302 }
4303
4304 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4305 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4306 {
4307 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4308 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4309 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4310 break;
4311 }
4312
4313 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4314 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4315 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4316 {
4317 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4318 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4319 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4320 break;
4321 }
4322
4323#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4324 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4325 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4326 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4327 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4328 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4329 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4330 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4331 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4332 break;
4333#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4334
4335 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4336 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4337 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4338 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4339 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4340 case PGMPOOLKIND_PAE_PD_PHYS:
4341 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4342 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4343 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4344 break;
4345
4346 case PGMPOOLKIND_32BIT_PD_PHYS:
4347 case PGMPOOLKIND_32BIT_PD:
4348 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4349 break;
4350
4351 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4352 case PGMPOOLKIND_PAE_PDPT:
4353 case PGMPOOLKIND_PAE_PDPT_PHYS:
4354 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4355 break;
4356
4357 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4358 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4359 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4360 break;
4361
4362 case PGMPOOLKIND_64BIT_PML4:
4363 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4364 break;
4365
4366 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4367 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4368 break;
4369
4370 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4371 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4372 break;
4373
4374 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4375 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4376 break;
4377
4378 default:
4379 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4380 }
4381
4382 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4383 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4384 ASMMemZeroPage(pvShw);
4385 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4386 pPage->fZeroed = true;
4387 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4388}
4389#endif /* PGMPOOL_WITH_USER_TRACKING */
4390
4391/**
4392 * Flushes a pool page.
4393 *
4394 * This moves the page to the free list after removing all user references to it.
4395 *
4396 * @returns VBox status code.
4397 * @retval VINF_SUCCESS on success.
4398 * @param pPool The pool.
4399 * @param HCPhys The HC physical address of the shadow page.
4400 */
4401int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4402{
4403 PVM pVM = pPool->CTX_SUFF(pVM);
4404
4405 int rc = VINF_SUCCESS;
4406 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4407 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4408 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4409
4410 /*
4411 * Quietly reject any attempts at flushing any of the special root pages.
4412 */
4413 if (pPage->idx < PGMPOOL_IDX_FIRST)
4414 {
4415 AssertFailed(); /* can no longer happen */
4416 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4417 return VINF_SUCCESS;
4418 }
4419
4420 pgmLock(pVM);
4421
4422 /*
4423 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4424 */
4425 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4426 {
4427 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4428 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4429 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4430 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4431 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4432 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4433 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4434 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4435 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4436 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4437 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4438 pgmUnlock(pVM);
4439 return VINF_SUCCESS;
4440 }
4441
4442#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4443 /* Start a subset so we won't run out of mapping space. */
4444 PVMCPU pVCpu = VMMGetCpu(pVM);
4445 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4446#endif
4447
4448 /*
4449 * Mark the page as being in need of an ASMMemZeroPage().
4450 */
4451 pPage->fZeroed = false;
4452
4453#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4454 if (pPage->fDirty)
4455 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, true /* force removal */);
4456#endif
4457
4458#ifdef PGMPOOL_WITH_USER_TRACKING
4459 /*
4460 * Clear the page.
4461 */
4462 pgmPoolTrackClearPageUsers(pPool, pPage);
4463 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4464 pgmPoolTrackDeref(pPool, pPage);
4465 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4466#endif
4467
4468#ifdef PGMPOOL_WITH_CACHE
4469 /*
4470 * Flush it from the cache.
4471 */
4472 pgmPoolCacheFlushPage(pPool, pPage);
4473#endif /* PGMPOOL_WITH_CACHE */
4474
4475#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4476 /* Heavy stuff done. */
4477 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4478#endif
4479
4480#ifdef PGMPOOL_WITH_MONITORING
4481 /*
4482 * Deregistering the monitoring.
4483 */
4484 if (pPage->fMonitored)
4485 rc = pgmPoolMonitorFlush(pPool, pPage);
4486#endif
4487
4488 /*
4489 * Free the page.
4490 */
4491 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4492 pPage->iNext = pPool->iFreeHead;
4493 pPool->iFreeHead = pPage->idx;
4494 pPage->enmKind = PGMPOOLKIND_FREE;
4495 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4496 pPage->GCPhys = NIL_RTGCPHYS;
4497 pPage->fReusedFlushPending = false;
4498
4499 pPool->cUsedPages--;
4500 pgmUnlock(pVM);
4501 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4502 return rc;
4503}
4504
4505
4506/**
4507 * Frees a usage of a pool page.
4508 *
4509 * The caller is responsible to updating the user table so that it no longer
4510 * references the shadow page.
4511 *
4512 * @param pPool The pool.
4513 * @param HCPhys The HC physical address of the shadow page.
4514 * @param iUser The shadow page pool index of the user table.
4515 * @param iUserTable The index into the user table (shadowed).
4516 */
4517void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4518{
4519 PVM pVM = pPool->CTX_SUFF(pVM);
4520
4521 STAM_PROFILE_START(&pPool->StatFree, a);
4522 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4523 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4524 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4525 pgmLock(pVM);
4526#ifdef PGMPOOL_WITH_USER_TRACKING
4527 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4528#endif
4529#ifdef PGMPOOL_WITH_CACHE
4530 if (!pPage->fCached)
4531#endif
4532 pgmPoolFlushPage(pPool, pPage);
4533 pgmUnlock(pVM);
4534 STAM_PROFILE_STOP(&pPool->StatFree, a);
4535}
4536
4537
4538/**
4539 * Makes one or more free page free.
4540 *
4541 * @returns VBox status code.
4542 * @retval VINF_SUCCESS on success.
4543 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4544 *
4545 * @param pPool The pool.
4546 * @param enmKind Page table kind
4547 * @param iUser The user of the page.
4548 */
4549static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4550{
4551 PVM pVM = pPool->CTX_SUFF(pVM);
4552
4553 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4554
4555 /*
4556 * If the pool isn't full grown yet, expand it.
4557 */
4558 if ( pPool->cCurPages < pPool->cMaxPages
4559#if defined(IN_RC)
4560 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4561 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4562 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4563#endif
4564 )
4565 {
4566 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4567#ifdef IN_RING3
4568 int rc = PGMR3PoolGrow(pVM);
4569#else
4570 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4571#endif
4572 if (RT_FAILURE(rc))
4573 return rc;
4574 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4575 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4576 return VINF_SUCCESS;
4577 }
4578
4579#ifdef PGMPOOL_WITH_CACHE
4580 /*
4581 * Free one cached page.
4582 */
4583 return pgmPoolCacheFreeOne(pPool, iUser);
4584#else
4585 /*
4586 * Flush the pool.
4587 *
4588 * If we have tracking enabled, it should be possible to come up with
4589 * a cheap replacement strategy...
4590 */
4591 /* @todo This path no longer works (CR3 root pages will be flushed)!! */
4592 AssertCompileFailed();
4593 Assert(!CPUMIsGuestInLongMode(pVM));
4594 pgmPoolFlushAllInt(pPool);
4595 return VERR_PGM_POOL_FLUSHED;
4596#endif
4597}
4598
4599/**
4600 * Allocates a page from the pool.
4601 *
4602 * This page may actually be a cached page and not in need of any processing
4603 * on the callers part.
4604 *
4605 * @returns VBox status code.
4606 * @retval VINF_SUCCESS if a NEW page was allocated.
4607 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4608 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4609 * @param pVM The VM handle.
4610 * @param GCPhys The GC physical address of the page we're gonna shadow.
4611 * For 4MB and 2MB PD entries, it's the first address the
4612 * shadow PT is covering.
4613 * @param enmKind The kind of mapping.
4614 * @param enmAccess Access type for the mapping (only relevant for big pages)
4615 * @param iUser The shadow page pool index of the user table.
4616 * @param iUserTable The index into the user table (shadowed).
4617 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4618 * @param fLockPage Lock the page
4619 */
4620int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4621{
4622 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4623 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4624 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4625 *ppPage = NULL;
4626 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4627 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4628 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4629
4630 pgmLock(pVM);
4631
4632#ifdef PGMPOOL_WITH_CACHE
4633 if (pPool->fCacheEnabled)
4634 {
4635 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4636 if (RT_SUCCESS(rc2))
4637 {
4638 if (fLockPage)
4639 pgmPoolLockPage(pPool, *ppPage);
4640 pgmUnlock(pVM);
4641 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4642 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4643 return rc2;
4644 }
4645 }
4646#endif
4647
4648 /*
4649 * Allocate a new one.
4650 */
4651 int rc = VINF_SUCCESS;
4652 uint16_t iNew = pPool->iFreeHead;
4653 if (iNew == NIL_PGMPOOL_IDX)
4654 {
4655 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4656 if (RT_FAILURE(rc))
4657 {
4658 pgmUnlock(pVM);
4659 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4660 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4661 return rc;
4662 }
4663 iNew = pPool->iFreeHead;
4664 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4665 }
4666
4667 /* unlink the free head */
4668 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4669 pPool->iFreeHead = pPage->iNext;
4670 pPage->iNext = NIL_PGMPOOL_IDX;
4671
4672 /*
4673 * Initialize it.
4674 */
4675 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4676 pPage->enmKind = enmKind;
4677 pPage->enmAccess = enmAccess;
4678 pPage->GCPhys = GCPhys;
4679 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4680 pPage->fMonitored = false;
4681 pPage->fCached = false;
4682#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4683 pPage->fDirty = false;
4684#endif
4685 pPage->fReusedFlushPending = false;
4686#ifdef PGMPOOL_WITH_MONITORING
4687 pPage->cModifications = 0;
4688 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4689 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4690#else
4691 pPage->fCR3Mix = false;
4692#endif
4693#ifdef PGMPOOL_WITH_USER_TRACKING
4694 pPage->cPresent = 0;
4695 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
4696 pPage->pvLastAccessHandlerFault = 0;
4697 pPage->cLastAccessHandlerCount = 0;
4698 pPage->pvLastAccessHandlerRip = 0;
4699
4700 /*
4701 * Insert into the tracking and cache. If this fails, free the page.
4702 */
4703 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4704 if (RT_FAILURE(rc3))
4705 {
4706 pPool->cUsedPages--;
4707 pPage->enmKind = PGMPOOLKIND_FREE;
4708 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4709 pPage->GCPhys = NIL_RTGCPHYS;
4710 pPage->iNext = pPool->iFreeHead;
4711 pPool->iFreeHead = pPage->idx;
4712 pgmUnlock(pVM);
4713 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4714 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4715 return rc3;
4716 }
4717#endif /* PGMPOOL_WITH_USER_TRACKING */
4718
4719 /*
4720 * Commit the allocation, clear the page and return.
4721 */
4722#ifdef VBOX_WITH_STATISTICS
4723 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4724 pPool->cUsedPagesHigh = pPool->cUsedPages;
4725#endif
4726
4727 if (!pPage->fZeroed)
4728 {
4729 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4730 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4731 ASMMemZeroPage(pv);
4732 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4733 }
4734
4735 *ppPage = pPage;
4736 if (fLockPage)
4737 pgmPoolLockPage(pPool, pPage);
4738 pgmUnlock(pVM);
4739 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4740 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4741 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4742 return rc;
4743}
4744
4745
4746/**
4747 * Frees a usage of a pool page.
4748 *
4749 * @param pVM The VM handle.
4750 * @param HCPhys The HC physical address of the shadow page.
4751 * @param iUser The shadow page pool index of the user table.
4752 * @param iUserTable The index into the user table (shadowed).
4753 */
4754void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4755{
4756 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4757 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4758 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4759}
4760
4761/**
4762 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4763 *
4764 * @returns Pointer to the shadow page structure.
4765 * @param pPool The pool.
4766 * @param HCPhys The HC physical address of the shadow page.
4767 */
4768PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4769{
4770 PVM pVM = pPool->CTX_SUFF(pVM);
4771
4772 Assert(PGMIsLockOwner(pVM));
4773
4774 /*
4775 * Look up the page.
4776 */
4777 pgmLock(pVM);
4778 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4779 pgmUnlock(pVM);
4780
4781 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4782 return pPage;
4783}
4784
4785#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
4786/**
4787 * Flush the specified page if present
4788 *
4789 * @param pVM The VM handle.
4790 * @param GCPhys Guest physical address of the page to flush
4791 */
4792void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
4793{
4794#ifdef PGMPOOL_WITH_CACHE
4795 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4796
4797 VM_ASSERT_EMT(pVM);
4798
4799 /*
4800 * Look up the GCPhys in the hash.
4801 */
4802 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
4803 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
4804 if (i == NIL_PGMPOOL_IDX)
4805 return;
4806
4807 do
4808 {
4809 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4810 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
4811 {
4812 switch (pPage->enmKind)
4813 {
4814 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4815 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4816 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4817 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4818 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4819 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4820 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4821 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4822 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4823 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4824 case PGMPOOLKIND_64BIT_PML4:
4825 case PGMPOOLKIND_32BIT_PD:
4826 case PGMPOOLKIND_PAE_PDPT:
4827 {
4828 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
4829#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4830 if (pPage->fDirty)
4831 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
4832 else
4833#endif
4834 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
4835 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
4836 pgmPoolMonitorChainFlush(pPool, pPage);
4837 return;
4838 }
4839
4840 /* ignore, no monitoring. */
4841 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4842 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4843 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4844 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4845 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4846 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4847 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4848 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4849 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4850 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4851 case PGMPOOLKIND_ROOT_NESTED:
4852 case PGMPOOLKIND_PAE_PD_PHYS:
4853 case PGMPOOLKIND_PAE_PDPT_PHYS:
4854 case PGMPOOLKIND_32BIT_PD_PHYS:
4855 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4856 break;
4857
4858 default:
4859 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4860 }
4861 }
4862
4863 /* next */
4864 i = pPage->iNext;
4865 } while (i != NIL_PGMPOOL_IDX);
4866#endif
4867 return;
4868}
4869#endif /* IN_RING3 */
4870
4871#ifdef IN_RING3
4872/**
4873 * Flushes the entire cache.
4874 *
4875 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4876 * and execute this CR3 flush.
4877 *
4878 * @param pPool The pool.
4879 */
4880void pgmR3PoolReset(PVM pVM)
4881{
4882 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4883
4884 Assert(PGMIsLockOwner(pVM));
4885 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
4886 LogFlow(("pgmPoolFlushAllInt:\n"));
4887
4888 /*
4889 * If there are no pages in the pool, there is nothing to do.
4890 */
4891 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4892 {
4893 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4894 return;
4895 }
4896
4897 /*
4898 * Exit the shadow mode since we're going to clear everything,
4899 * including the root page.
4900 */
4901 for (unsigned i=0;i<pVM->cCPUs;i++)
4902 {
4903 PVMCPU pVCpu = &pVM->aCpus[i];
4904 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
4905 }
4906
4907 /*
4908 * Nuke the free list and reinsert all pages into it.
4909 */
4910 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4911 {
4912 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4913
4914 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
4915#ifdef PGMPOOL_WITH_MONITORING
4916 if (pPage->fMonitored)
4917 pgmPoolMonitorFlush(pPool, pPage);
4918 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4919 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4920 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4921 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4922 pPage->cModifications = 0;
4923#endif
4924 pPage->GCPhys = NIL_RTGCPHYS;
4925 pPage->enmKind = PGMPOOLKIND_FREE;
4926 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4927 Assert(pPage->idx == i);
4928 pPage->iNext = i + 1;
4929 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4930 pPage->fSeenNonGlobal = false;
4931 pPage->fMonitored = false;
4932#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4933 pPage->fDirty = false;
4934#endif
4935 pPage->fCached = false;
4936 pPage->fReusedFlushPending = false;
4937#ifdef PGMPOOL_WITH_USER_TRACKING
4938 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4939#else
4940 pPage->fCR3Mix = false;
4941#endif
4942#ifdef PGMPOOL_WITH_CACHE
4943 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4944 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4945#endif
4946 pPage->cLocked = 0;
4947 }
4948 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4949 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4950 pPool->cUsedPages = 0;
4951
4952#ifdef PGMPOOL_WITH_USER_TRACKING
4953 /*
4954 * Zap and reinitialize the user records.
4955 */
4956 pPool->cPresent = 0;
4957 pPool->iUserFreeHead = 0;
4958 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4959 const unsigned cMaxUsers = pPool->cMaxUsers;
4960 for (unsigned i = 0; i < cMaxUsers; i++)
4961 {
4962 paUsers[i].iNext = i + 1;
4963 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4964 paUsers[i].iUserTable = 0xfffffffe;
4965 }
4966 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4967#endif
4968
4969#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4970 /*
4971 * Clear all the GCPhys links and rebuild the phys ext free list.
4972 */
4973 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
4974 pRam;
4975 pRam = pRam->CTX_SUFF(pNext))
4976 {
4977 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4978 while (iPage-- > 0)
4979 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
4980 }
4981
4982 pPool->iPhysExtFreeHead = 0;
4983 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4984 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4985 for (unsigned i = 0; i < cMaxPhysExts; i++)
4986 {
4987 paPhysExts[i].iNext = i + 1;
4988 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4989 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4990 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4991 }
4992 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4993#endif
4994
4995#ifdef PGMPOOL_WITH_MONITORING
4996 /*
4997 * Just zap the modified list.
4998 */
4999 pPool->cModifiedPages = 0;
5000 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5001#endif
5002
5003#ifdef PGMPOOL_WITH_CACHE
5004 /*
5005 * Clear the GCPhys hash and the age list.
5006 */
5007 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5008 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5009 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5010 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5011#endif
5012
5013#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5014 /* Clear all dirty pages. */
5015 pPool->idxFreeDirtyPage = 0;
5016 pPool->cDirtyPages = 0;
5017 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
5018 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
5019#endif
5020
5021 /*
5022 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5023 */
5024 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
5025 {
5026 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5027 pPage->iNext = NIL_PGMPOOL_IDX;
5028#ifdef PGMPOOL_WITH_MONITORING
5029 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5030 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5031 pPage->cModifications = 0;
5032 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
5033 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5034 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5035 if (pPage->fMonitored)
5036 {
5037 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
5038 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
5039 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
5040 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5041 pPool->pszAccessHandler);
5042 AssertFatalRCSuccess(rc);
5043# ifdef PGMPOOL_WITH_CACHE
5044 pgmPoolHashInsert(pPool, pPage);
5045# endif
5046 }
5047#endif
5048#ifdef PGMPOOL_WITH_USER_TRACKING
5049 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5050#endif
5051#ifdef PGMPOOL_WITH_CACHE
5052 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5053 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5054#endif
5055 }
5056
5057 for (unsigned i=0;i<pVM->cCPUs;i++)
5058 {
5059 PVMCPU pVCpu = &pVM->aCpus[i];
5060 /*
5061 * Re-enter the shadowing mode and assert Sync CR3 FF.
5062 */
5063 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5064 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5065 }
5066
5067 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
5068}
5069#endif /* IN_RING3 */
5070
5071#ifdef LOG_ENABLED
5072static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5073{
5074 switch(enmKind)
5075 {
5076 case PGMPOOLKIND_INVALID:
5077 return "PGMPOOLKIND_INVALID";
5078 case PGMPOOLKIND_FREE:
5079 return "PGMPOOLKIND_FREE";
5080 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5081 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5082 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5083 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5084 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5085 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5086 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5087 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5088 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5089 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5090 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5091 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5092 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5093 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5094 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5095 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5096 case PGMPOOLKIND_32BIT_PD:
5097 return "PGMPOOLKIND_32BIT_PD";
5098 case PGMPOOLKIND_32BIT_PD_PHYS:
5099 return "PGMPOOLKIND_32BIT_PD_PHYS";
5100 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5101 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5102 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5103 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5104 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5105 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5106 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5107 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5108 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5109 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5110 case PGMPOOLKIND_PAE_PD_PHYS:
5111 return "PGMPOOLKIND_PAE_PD_PHYS";
5112 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5113 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5114 case PGMPOOLKIND_PAE_PDPT:
5115 return "PGMPOOLKIND_PAE_PDPT";
5116 case PGMPOOLKIND_PAE_PDPT_PHYS:
5117 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5118 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5119 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5120 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5121 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5122 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5123 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5124 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5125 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5126 case PGMPOOLKIND_64BIT_PML4:
5127 return "PGMPOOLKIND_64BIT_PML4";
5128 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5129 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5130 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5131 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5132 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5133 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5134 case PGMPOOLKIND_ROOT_NESTED:
5135 return "PGMPOOLKIND_ROOT_NESTED";
5136 }
5137 return "Unknown kind!";
5138}
5139#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette