VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 22406

Last change on this file since 22406 was 22349, checked in by vboxsync, 16 years ago

Make sure we don't kick out a page too quickly.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 166.7 KB
Line 
1/* $Id: PGMAllPool.cpp 22349 2009-08-19 14:16:10Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48RT_C_DECLS_BEGIN
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50#ifdef PGMPOOL_WITH_USER_TRACKING
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54#endif
55#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
56static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
57#endif
58#ifdef PGMPOOL_WITH_CACHE
59static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
60#endif
61#ifdef PGMPOOL_WITH_MONITORING
62static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
63#endif
64#ifndef IN_RING3
65DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
66#endif
67#ifdef LOG_ENABLED
68static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
69#endif
70
71void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs);
72void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt);
73int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
74PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
75void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
76void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
77
78RT_C_DECLS_END
79
80
81/**
82 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
83 *
84 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
85 * @param enmKind The page kind.
86 */
87DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
88{
89 switch (enmKind)
90 {
91 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
92 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
93 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
94 return true;
95 default:
96 return false;
97 }
98}
99
100/** @def PGMPOOL_PAGE_2_LOCKED_PTR
101 * Maps a pool page pool into the current context and lock it (RC only).
102 *
103 * @returns VBox status code.
104 * @param pVM The VM handle.
105 * @param pPage The pool page.
106 *
107 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
108 * small page window employeed by that function. Be careful.
109 * @remark There is no need to assert on the result.
110 */
111#if defined(IN_RC)
112DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
113{
114 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
115
116 /* Make sure the dynamic mapping will not be reused. */
117 if (pv)
118 PGMDynLockHCPage(pVM, (uint8_t *)pv);
119
120 return pv;
121}
122#else
123# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
124#endif
125
126/** @def PGMPOOL_UNLOCK_PTR
127 * Unlock a previously locked dynamic caching (RC only).
128 *
129 * @returns VBox status code.
130 * @param pVM The VM handle.
131 * @param pPage The pool page.
132 *
133 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
134 * small page window employeed by that function. Be careful.
135 * @remark There is no need to assert on the result.
136 */
137#if defined(IN_RC)
138DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
139{
140 if (pvPage)
141 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
142}
143#else
144# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
145#endif
146
147
148#ifdef PGMPOOL_WITH_MONITORING
149/**
150 * Determin the size of a write instruction.
151 * @returns number of bytes written.
152 * @param pDis The disassembler state.
153 */
154static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
155{
156 /*
157 * This is very crude and possibly wrong for some opcodes,
158 * but since it's not really supposed to be called we can
159 * probably live with that.
160 */
161 return DISGetParamSize(pDis, &pDis->param1);
162}
163
164
165/**
166 * Flushes a chain of pages sharing the same access monitor.
167 *
168 * @returns VBox status code suitable for scheduling.
169 * @param pPool The pool.
170 * @param pPage A page in the chain.
171 */
172int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
173{
174 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
175
176 /*
177 * Find the list head.
178 */
179 uint16_t idx = pPage->idx;
180 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
181 {
182 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
183 {
184 idx = pPage->iMonitoredPrev;
185 Assert(idx != pPage->idx);
186 pPage = &pPool->aPages[idx];
187 }
188 }
189
190 /*
191 * Iterate the list flushing each shadow page.
192 */
193 int rc = VINF_SUCCESS;
194 for (;;)
195 {
196 idx = pPage->iMonitoredNext;
197 Assert(idx != pPage->idx);
198 if (pPage->idx >= PGMPOOL_IDX_FIRST)
199 {
200 int rc2 = pgmPoolFlushPage(pPool, pPage);
201 AssertRC(rc2);
202 }
203 /* next */
204 if (idx == NIL_PGMPOOL_IDX)
205 break;
206 pPage = &pPool->aPages[idx];
207 }
208 return rc;
209}
210
211
212/**
213 * Wrapper for getting the current context pointer to the entry being modified.
214 *
215 * @returns VBox status code suitable for scheduling.
216 * @param pVM VM Handle.
217 * @param pvDst Destination address
218 * @param pvSrc Source guest virtual address.
219 * @param GCPhysSrc The source guest physical address.
220 * @param cb Size of data to read
221 */
222DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
223{
224#if defined(IN_RING3)
225 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
226 return VINF_SUCCESS;
227#else
228 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
229 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
230#endif
231}
232
233/**
234 * Process shadow entries before they are changed by the guest.
235 *
236 * For PT entries we will clear them. For PD entries, we'll simply check
237 * for mapping conflicts and set the SyncCR3 FF if found.
238 *
239 * @param pVCpu VMCPU handle
240 * @param pPool The pool.
241 * @param pPage The head page.
242 * @param GCPhysFault The guest physical fault address.
243 * @param uAddress In R0 and GC this is the guest context fault address (flat).
244 * In R3 this is the host context 'fault' address.
245 * @param pDis The disassembler state for figuring out the write size.
246 * This need not be specified if the caller knows we won't do cross entry accesses.
247 */
248void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, PDISCPUSTATE pDis)
249{
250 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
251 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
252 const unsigned cbWrite = pDis ? pgmPoolDisasWriteSize(pDis) : 0;
253 PVM pVM = pPool->CTX_SUFF(pVM);
254
255 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%s cbWrite=%d\n", (RTGCPTR)pvAddress, GCPhysFault, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
256 for (;;)
257 {
258 union
259 {
260 void *pv;
261 PX86PT pPT;
262 PX86PTPAE pPTPae;
263 PX86PD pPD;
264 PX86PDPAE pPDPae;
265 PX86PDPT pPDPT;
266 PX86PML4 pPML4;
267 } uShw;
268
269 uShw.pv = NULL;
270 switch (pPage->enmKind)
271 {
272 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
273 {
274 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
275 const unsigned iShw = off / sizeof(X86PTE);
276 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
277 if (uShw.pPT->a[iShw].n.u1Present)
278 {
279# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
280 X86PTE GstPte;
281
282 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
283 AssertRC(rc);
284 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
285 pgmPoolTracDerefGCPhysHint(pPool, pPage,
286 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
287 GstPte.u & X86_PTE_PG_MASK);
288# endif
289 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
290 }
291 break;
292 }
293
294 /* page/2 sized */
295 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
296 {
297 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
298 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
299 {
300 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
301 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
302 if (uShw.pPTPae->a[iShw].n.u1Present)
303 {
304# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
305 X86PTE GstPte;
306 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
307 AssertRC(rc);
308
309 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
310 pgmPoolTracDerefGCPhysHint(pPool, pPage,
311 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
312 GstPte.u & X86_PTE_PG_MASK);
313# endif
314 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
315 }
316 }
317 break;
318 }
319
320 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
321 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
322 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
323 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
324 {
325 unsigned iGst = off / sizeof(X86PDE);
326 unsigned iShwPdpt = iGst / 256;
327 unsigned iShw = (iGst % 256) * 2;
328 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
329
330 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
331 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
332 {
333 for (unsigned i = 0; i < 2; i++)
334 {
335# ifndef IN_RING0
336 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
337 {
338 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
339 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
340 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
341 break;
342 }
343 else
344# endif /* !IN_RING0 */
345 if (uShw.pPDPae->a[iShw+i].n.u1Present)
346 {
347 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
348 pgmPoolFree(pVM,
349 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
350 pPage->idx,
351 iShw + i);
352 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
353 }
354
355 /* paranoia / a bit assumptive. */
356 if ( pDis
357 && (off & 3)
358 && (off & 3) + cbWrite > 4)
359 {
360 const unsigned iShw2 = iShw + 2 + i;
361 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
362 {
363# ifndef IN_RING0
364 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
365 {
366 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
367 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
368 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
369 break;
370 }
371 else
372# endif /* !IN_RING0 */
373 if (uShw.pPDPae->a[iShw2].n.u1Present)
374 {
375 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
376 pgmPoolFree(pVM,
377 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
378 pPage->idx,
379 iShw2);
380 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
381 }
382 }
383 }
384 }
385 }
386 break;
387 }
388
389 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
390 {
391 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
392 const unsigned iShw = off / sizeof(X86PTEPAE);
393 if (uShw.pPTPae->a[iShw].n.u1Present)
394 {
395# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
396 X86PTEPAE GstPte;
397 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
398 AssertRC(rc);
399
400 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
401 pgmPoolTracDerefGCPhysHint(pPool, pPage,
402 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
403 GstPte.u & X86_PTE_PAE_PG_MASK);
404# endif
405 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
406 }
407
408 /* paranoia / a bit assumptive. */
409 if ( pDis
410 && (off & 7)
411 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
412 {
413 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
414 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
415
416 if (uShw.pPTPae->a[iShw2].n.u1Present)
417 {
418# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
419 X86PTEPAE GstPte;
420# ifdef IN_RING3
421 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
422# else
423 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
424# endif
425 AssertRC(rc);
426 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
427 pgmPoolTracDerefGCPhysHint(pPool, pPage,
428 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
429 GstPte.u & X86_PTE_PAE_PG_MASK);
430# endif
431 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
432 }
433 }
434 break;
435 }
436
437 case PGMPOOLKIND_32BIT_PD:
438 {
439 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
440 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
441
442 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
443# ifndef IN_RING0
444 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
445 {
446 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
447 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
448 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
449 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
450 break;
451 }
452# endif /* !IN_RING0 */
453# ifndef IN_RING0
454 else
455# endif /* !IN_RING0 */
456 {
457 if (uShw.pPD->a[iShw].n.u1Present)
458 {
459 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
460 pgmPoolFree(pVM,
461 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
462 pPage->idx,
463 iShw);
464 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
465 }
466 }
467 /* paranoia / a bit assumptive. */
468 if ( pDis
469 && (off & 3)
470 && (off & 3) + cbWrite > sizeof(X86PTE))
471 {
472 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
473 if ( iShw2 != iShw
474 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
475 {
476# ifndef IN_RING0
477 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
478 {
479 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
480 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
481 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
482 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
483 break;
484 }
485# endif /* !IN_RING0 */
486# ifndef IN_RING0
487 else
488# endif /* !IN_RING0 */
489 {
490 if (uShw.pPD->a[iShw2].n.u1Present)
491 {
492 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
493 pgmPoolFree(pVM,
494 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
495 pPage->idx,
496 iShw2);
497 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
498 }
499 }
500 }
501 }
502#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
503 if ( uShw.pPD->a[iShw].n.u1Present
504 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
505 {
506 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
507# ifdef IN_RC /* TLB load - we're pushing things a bit... */
508 ASMProbeReadByte(pvAddress);
509# endif
510 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
511 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
512 }
513#endif
514 break;
515 }
516
517 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
518 {
519 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
520 const unsigned iShw = off / sizeof(X86PDEPAE);
521#ifndef IN_RING0
522 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
523 {
524 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
525 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
526 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
527 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
528 break;
529 }
530#endif /* !IN_RING0 */
531 /*
532 * Causes trouble when the guest uses a PDE to refer to the whole page table level
533 * structure. (Invalidate here; faults later on when it tries to change the page
534 * table entries -> recheck; probably only applies to the RC case.)
535 */
536# ifndef IN_RING0
537 else
538# endif /* !IN_RING0 */
539 {
540 if (uShw.pPDPae->a[iShw].n.u1Present)
541 {
542 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
543 pgmPoolFree(pVM,
544 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
545 pPage->idx,
546 iShw);
547 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
548 }
549 }
550 /* paranoia / a bit assumptive. */
551 if ( pDis
552 && (off & 7)
553 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
554 {
555 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
556 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
557
558#ifndef IN_RING0
559 if ( iShw2 != iShw
560 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
561 {
562 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
563 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
564 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
565 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
566 break;
567 }
568#endif /* !IN_RING0 */
569# ifndef IN_RING0
570 else
571# endif /* !IN_RING0 */
572 if (uShw.pPDPae->a[iShw2].n.u1Present)
573 {
574 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
575 pgmPoolFree(pVM,
576 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
577 pPage->idx,
578 iShw2);
579 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
580 }
581 }
582 break;
583 }
584
585 case PGMPOOLKIND_PAE_PDPT:
586 {
587 /*
588 * Hopefully this doesn't happen very often:
589 * - touching unused parts of the page
590 * - messing with the bits of pd pointers without changing the physical address
591 */
592 /* PDPT roots are not page aligned; 32 byte only! */
593 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
594
595 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
596 const unsigned iShw = offPdpt / sizeof(X86PDPE);
597 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
598 {
599# ifndef IN_RING0
600 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
601 {
602 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
603 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
604 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
605 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
606 break;
607 }
608# endif /* !IN_RING0 */
609# ifndef IN_RING0
610 else
611# endif /* !IN_RING0 */
612 if (uShw.pPDPT->a[iShw].n.u1Present)
613 {
614 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
615 pgmPoolFree(pVM,
616 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
617 pPage->idx,
618 iShw);
619 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
620 }
621
622 /* paranoia / a bit assumptive. */
623 if ( pDis
624 && (offPdpt & 7)
625 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
626 {
627 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
628 if ( iShw2 != iShw
629 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
630 {
631# ifndef IN_RING0
632 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
633 {
634 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
635 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
636 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
637 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
638 break;
639 }
640# endif /* !IN_RING0 */
641# ifndef IN_RING0
642 else
643# endif /* !IN_RING0 */
644 if (uShw.pPDPT->a[iShw2].n.u1Present)
645 {
646 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
647 pgmPoolFree(pVM,
648 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
649 pPage->idx,
650 iShw2);
651 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
652 }
653 }
654 }
655 }
656 break;
657 }
658
659#ifndef IN_RC
660 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
661 {
662 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
663 const unsigned iShw = off / sizeof(X86PDEPAE);
664 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
665 if (uShw.pPDPae->a[iShw].n.u1Present)
666 {
667 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
668 pgmPoolFree(pVM,
669 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
670 pPage->idx,
671 iShw);
672 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
673 }
674 /* paranoia / a bit assumptive. */
675 if ( pDis
676 && (off & 7)
677 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
678 {
679 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
680 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
681
682 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
683 if (uShw.pPDPae->a[iShw2].n.u1Present)
684 {
685 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
686 pgmPoolFree(pVM,
687 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
688 pPage->idx,
689 iShw2);
690 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
691 }
692 }
693 break;
694 }
695
696 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
697 {
698 /*
699 * Hopefully this doesn't happen very often:
700 * - messing with the bits of pd pointers without changing the physical address
701 */
702 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
703 {
704 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
705 const unsigned iShw = off / sizeof(X86PDPE);
706 if (uShw.pPDPT->a[iShw].n.u1Present)
707 {
708 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
709 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
710 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
711 }
712 /* paranoia / a bit assumptive. */
713 if ( pDis
714 && (off & 7)
715 && (off & 7) + cbWrite > sizeof(X86PDPE))
716 {
717 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
718 if (uShw.pPDPT->a[iShw2].n.u1Present)
719 {
720 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
721 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
722 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
723 }
724 }
725 }
726 break;
727 }
728
729 case PGMPOOLKIND_64BIT_PML4:
730 {
731 /*
732 * Hopefully this doesn't happen very often:
733 * - messing with the bits of pd pointers without changing the physical address
734 */
735 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
736 {
737 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
738 const unsigned iShw = off / sizeof(X86PDPE);
739 if (uShw.pPML4->a[iShw].n.u1Present)
740 {
741 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
742 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
743 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
744 }
745 /* paranoia / a bit assumptive. */
746 if ( pDis
747 && (off & 7)
748 && (off & 7) + cbWrite > sizeof(X86PDPE))
749 {
750 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
751 if (uShw.pPML4->a[iShw2].n.u1Present)
752 {
753 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
754 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
755 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
756 }
757 }
758 }
759 break;
760 }
761#endif /* IN_RING0 */
762
763 default:
764 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
765 }
766 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
767
768 /* next */
769 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
770 return;
771 pPage = &pPool->aPages[pPage->iMonitoredNext];
772 }
773}
774
775# ifndef IN_RING3
776/**
777 * Checks if a access could be a fork operation in progress.
778 *
779 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
780 *
781 * @returns true if it's likly that we're forking, otherwise false.
782 * @param pPool The pool.
783 * @param pDis The disassembled instruction.
784 * @param offFault The access offset.
785 */
786DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
787{
788 /*
789 * i386 linux is using btr to clear X86_PTE_RW.
790 * The functions involved are (2.6.16 source inspection):
791 * clear_bit
792 * ptep_set_wrprotect
793 * copy_one_pte
794 * copy_pte_range
795 * copy_pmd_range
796 * copy_pud_range
797 * copy_page_range
798 * dup_mmap
799 * dup_mm
800 * copy_mm
801 * copy_process
802 * do_fork
803 */
804 if ( pDis->pCurInstr->opcode == OP_BTR
805 && !(offFault & 4)
806 /** @todo Validate that the bit index is X86_PTE_RW. */
807 )
808 {
809 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
810 return true;
811 }
812 return false;
813}
814
815
816/**
817 * Determine whether the page is likely to have been reused.
818 *
819 * @returns true if we consider the page as being reused for a different purpose.
820 * @returns false if we consider it to still be a paging page.
821 * @param pVM VM Handle.
822 * @param pVCpu VMCPU Handle.
823 * @param pRegFrame Trap register frame.
824 * @param pDis The disassembly info for the faulting instruction.
825 * @param pvFault The fault address.
826 *
827 * @remark The REP prefix check is left to the caller because of STOSD/W.
828 */
829DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
830{
831#ifndef IN_RC
832 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
833 if ( HWACCMHasPendingIrq(pVM)
834 && (pRegFrame->rsp - pvFault) < 32)
835 {
836 /* Fault caused by stack writes while trying to inject an interrupt event. */
837 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
838 return true;
839 }
840#else
841 NOREF(pVM); NOREF(pvFault);
842#endif
843
844 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
845
846 /* Non-supervisor mode write means it's used for something else. */
847 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
848 return true;
849
850 switch (pDis->pCurInstr->opcode)
851 {
852 /* call implies the actual push of the return address faulted */
853 case OP_CALL:
854 Log4(("pgmPoolMonitorIsReused: CALL\n"));
855 return true;
856 case OP_PUSH:
857 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
858 return true;
859 case OP_PUSHF:
860 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
861 return true;
862 case OP_PUSHA:
863 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
864 return true;
865 case OP_FXSAVE:
866 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
867 return true;
868 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
869 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
870 return true;
871 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
872 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
873 return true;
874 case OP_MOVSWD:
875 case OP_STOSWD:
876 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
877 && pRegFrame->rcx >= 0x40
878 )
879 {
880 Assert(pDis->mode == CPUMODE_64BIT);
881
882 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
883 return true;
884 }
885 return false;
886 }
887 if ( (pDis->param1.flags & USE_REG_GEN32)
888 && (pDis->param1.base.reg_gen == USE_REG_ESP))
889 {
890 Log4(("pgmPoolMonitorIsReused: ESP\n"));
891 return true;
892 }
893
894 return false;
895}
896
897
898/**
899 * Flushes the page being accessed.
900 *
901 * @returns VBox status code suitable for scheduling.
902 * @param pVM The VM handle.
903 * @param pVCpu The VMCPU handle.
904 * @param pPool The pool.
905 * @param pPage The pool page (head).
906 * @param pDis The disassembly of the write instruction.
907 * @param pRegFrame The trap register frame.
908 * @param GCPhysFault The fault address as guest physical address.
909 * @param pvFault The fault address.
910 */
911static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
912 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
913{
914 /*
915 * First, do the flushing.
916 */
917 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
918
919 /*
920 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
921 */
922 uint32_t cbWritten;
923 int rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten);
924 if (RT_SUCCESS(rc2))
925 pRegFrame->rip += pDis->opsize;
926 else if (rc2 == VERR_EM_INTERPRETER)
927 {
928#ifdef IN_RC
929 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
930 {
931 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
932 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
933 rc = VINF_SUCCESS;
934 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
935 }
936 else
937#endif
938 {
939 rc = VINF_EM_RAW_EMULATE_INSTR;
940 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
941 }
942 }
943 else
944 rc = rc2;
945
946 /* See use in pgmPoolAccessHandlerSimple(). */
947 PGM_INVL_VCPU_TLBS(pVCpu);
948
949 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
950 return rc;
951
952}
953
954
955/**
956 * Handles the STOSD write accesses.
957 *
958 * @returns VBox status code suitable for scheduling.
959 * @param pVM The VM handle.
960 * @param pPool The pool.
961 * @param pPage The pool page (head).
962 * @param pDis The disassembly of the write instruction.
963 * @param pRegFrame The trap register frame.
964 * @param GCPhysFault The fault address as guest physical address.
965 * @param pvFault The fault address.
966 */
967DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
968 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
969{
970 unsigned uIncrement = pDis->param1.size;
971
972 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
973 Assert(pRegFrame->rcx <= 0x20);
974
975#ifdef VBOX_STRICT
976 if (pDis->opmode == CPUMODE_32BIT)
977 Assert(uIncrement == 4);
978 else
979 Assert(uIncrement == 8);
980#endif
981
982 Log3(("pgmPoolAccessHandlerSTOSD\n"));
983
984 /*
985 * Increment the modification counter and insert it into the list
986 * of modified pages the first time.
987 */
988 if (!pPage->cModifications++)
989 pgmPoolMonitorModifiedInsert(pPool, pPage);
990
991 /*
992 * Execute REP STOSD.
993 *
994 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
995 * write situation, meaning that it's safe to write here.
996 */
997 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
998 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
999 while (pRegFrame->rcx)
1000 {
1001#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1002 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1003 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1004 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1005#else
1006 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1007#endif
1008#ifdef IN_RC
1009 *(uint32_t *)pu32 = pRegFrame->eax;
1010#else
1011 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
1012#endif
1013 pu32 += uIncrement;
1014 GCPhysFault += uIncrement;
1015 pRegFrame->rdi += uIncrement;
1016 pRegFrame->rcx--;
1017 }
1018 pRegFrame->rip += pDis->opsize;
1019
1020#ifdef IN_RC
1021 /* See use in pgmPoolAccessHandlerSimple(). */
1022 PGM_INVL_VCPU_TLBS(pVCpu);
1023#endif
1024
1025 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1026 return VINF_SUCCESS;
1027}
1028
1029
1030/**
1031 * Handles the simple write accesses.
1032 *
1033 * @returns VBox status code suitable for scheduling.
1034 * @param pVM The VM handle.
1035 * @param pVCpu The VMCPU handle.
1036 * @param pPool The pool.
1037 * @param pPage The pool page (head).
1038 * @param pDis The disassembly of the write instruction.
1039 * @param pRegFrame The trap register frame.
1040 * @param GCPhysFault The fault address as guest physical address.
1041 * @param pvFault The fault address.
1042 */
1043DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1044 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1045{
1046 Log3(("pgmPoolAccessHandlerSimple\n"));
1047 /*
1048 * Increment the modification counter and insert it into the list
1049 * of modified pages the first time.
1050 */
1051 if (!pPage->cModifications++)
1052 pgmPoolMonitorModifiedInsert(pPool, pPage);
1053
1054 /*
1055 * Clear all the pages. ASSUMES that pvFault is readable.
1056 */
1057#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1058 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1059 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, pDis);
1060 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1061#else
1062 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, pDis);
1063#endif
1064
1065 /*
1066 * Interpret the instruction.
1067 */
1068 uint32_t cb;
1069 int rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb);
1070 if (RT_SUCCESS(rc))
1071 pRegFrame->rip += pDis->opsize;
1072 else if (rc == VERR_EM_INTERPRETER)
1073 {
1074 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1075 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1076 rc = VINF_EM_RAW_EMULATE_INSTR;
1077 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1078 }
1079
1080#ifdef IN_RC
1081 /*
1082 * Quick hack, with logging enabled we're getting stale
1083 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1084 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1085 * have to be fixed to support this. But that'll have to wait till next week.
1086 *
1087 * An alternative is to keep track of the changed PTEs together with the
1088 * GCPhys from the guest PT. This may proove expensive though.
1089 *
1090 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1091 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1092 */
1093 PGM_INVL_VCPU_TLBS(pVCpu);
1094#endif
1095
1096 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1097 return rc;
1098}
1099
1100/**
1101 * \#PF Handler callback for PT write accesses.
1102 *
1103 * @returns VBox status code (appropriate for GC return).
1104 * @param pVM VM Handle.
1105 * @param uErrorCode CPU Error code.
1106 * @param pRegFrame Trap register frame.
1107 * NULL on DMA and other non CPU access.
1108 * @param pvFault The fault address (cr2).
1109 * @param GCPhysFault The GC physical address corresponding to pvFault.
1110 * @param pvUser User argument.
1111 */
1112DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1113{
1114 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1115 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1116 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1117 PVMCPU pVCpu = VMMGetCpu(pVM);
1118 unsigned cMaxModifications;
1119
1120 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1121
1122 pgmLock(pVM);
1123 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1124 {
1125 /* Pool page changed while we were waiting for the lock; ignore. */
1126 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1127 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1128 pgmUnlock(pVM);
1129 return VINF_SUCCESS;
1130 }
1131
1132 /*
1133 * Disassemble the faulting instruction.
1134 */
1135 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1136 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1137 AssertReturnStmt(rc == VINF_SUCCESS, pgmUnlock(pVM), rc);
1138
1139 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1140
1141 /*
1142 * We should ALWAYS have the list head as user parameter. This
1143 * is because we use that page to record the changes.
1144 */
1145 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1146
1147 /* Maximum nr of modifications depends on the guest mode. */
1148 if (pDis->mode == CPUMODE_32BIT)
1149 cMaxModifications = 48;
1150 else
1151 cMaxModifications = 24;
1152
1153 /*
1154 * Incremental page table updates should weight more than random ones.
1155 * (Only applies when started from offset 0)
1156 */
1157 pVCpu->pgm.s.cPoolAccessHandler++;
1158 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1159 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1160 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1161 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1162 {
1163 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1164 pPage->cModifications = pPage->cModifications * 2;
1165 pPage->pvLastAccessHandlerFault = pvFault;
1166 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1167 if (pPage->cModifications > cMaxModifications)
1168 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1169 }
1170
1171 /*
1172 * Check if it's worth dealing with.
1173 */
1174 bool fReused = false;
1175 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1176 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1177 )
1178 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1179 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1180 {
1181 /*
1182 * Simple instructions, no REP prefix.
1183 */
1184 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1185 {
1186 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1187
1188 /* A mov instruction to change the first page table entry will be remembered so we can detect
1189 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1190 */
1191 if ( rc == VINF_SUCCESS
1192 && pDis->pCurInstr->opcode == OP_MOV
1193 && (pvFault & PAGE_OFFSET_MASK) == 0)
1194 {
1195 pPage->pvLastAccessHandlerFault = pvFault;
1196 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1197 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1198 /* Make sure we don't kick out a page too quickly. */
1199 if (pPage->cModifications > 8)
1200 pPage->cModifications = 2;
1201 }
1202 else
1203 if (pPage->pvLastAccessHandlerFault == pvFault)
1204 {
1205 /* ignore the 2nd write to this page table entry. */
1206 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1207 }
1208 else
1209 {
1210 pPage->pvLastAccessHandlerFault = 0;
1211 pPage->pvLastAccessHandlerRip = 0;
1212 }
1213
1214 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1215 pgmUnlock(pVM);
1216 return rc;
1217 }
1218
1219 /*
1220 * Windows is frequently doing small memset() operations (netio test 4k+).
1221 * We have to deal with these or we'll kill the cache and performance.
1222 */
1223 if ( pDis->pCurInstr->opcode == OP_STOSWD
1224 && !pRegFrame->eflags.Bits.u1DF
1225 && pDis->opmode == pDis->mode
1226 && pDis->addrmode == pDis->mode)
1227 {
1228 bool fValidStosd = false;
1229
1230 if ( pDis->mode == CPUMODE_32BIT
1231 && pDis->prefix == PREFIX_REP
1232 && pRegFrame->ecx <= 0x20
1233 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1234 && !((uintptr_t)pvFault & 3)
1235 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1236 )
1237 {
1238 fValidStosd = true;
1239 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1240 }
1241 else
1242 if ( pDis->mode == CPUMODE_64BIT
1243 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1244 && pRegFrame->rcx <= 0x20
1245 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1246 && !((uintptr_t)pvFault & 7)
1247 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1248 )
1249 {
1250 fValidStosd = true;
1251 }
1252
1253 if (fValidStosd)
1254 {
1255 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1256 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1257 pgmUnlock(pVM);
1258 return rc;
1259 }
1260 }
1261
1262 /* REP prefix, don't bother. */
1263 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1264 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1265 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1266 }
1267
1268 /*
1269 * Not worth it, so flush it.
1270 *
1271 * If we considered it to be reused, don't go back to ring-3
1272 * to emulate failed instructions since we usually cannot
1273 * interpret then. This may be a bit risky, in which case
1274 * the reuse detection must be fixed.
1275 */
1276 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1277 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1278 rc = VINF_SUCCESS;
1279 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1280 pgmUnlock(pVM);
1281 return rc;
1282}
1283
1284# endif /* !IN_RING3 */
1285#endif /* PGMPOOL_WITH_MONITORING */
1286
1287#ifdef PGMPOOL_WITH_CACHE
1288
1289/**
1290 * Inserts a page into the GCPhys hash table.
1291 *
1292 * @param pPool The pool.
1293 * @param pPage The page.
1294 */
1295DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1296{
1297 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1298 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1299 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1300 pPage->iNext = pPool->aiHash[iHash];
1301 pPool->aiHash[iHash] = pPage->idx;
1302}
1303
1304
1305/**
1306 * Removes a page from the GCPhys hash table.
1307 *
1308 * @param pPool The pool.
1309 * @param pPage The page.
1310 */
1311DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1312{
1313 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1314 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1315 if (pPool->aiHash[iHash] == pPage->idx)
1316 pPool->aiHash[iHash] = pPage->iNext;
1317 else
1318 {
1319 uint16_t iPrev = pPool->aiHash[iHash];
1320 for (;;)
1321 {
1322 const int16_t i = pPool->aPages[iPrev].iNext;
1323 if (i == pPage->idx)
1324 {
1325 pPool->aPages[iPrev].iNext = pPage->iNext;
1326 break;
1327 }
1328 if (i == NIL_PGMPOOL_IDX)
1329 {
1330 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1331 break;
1332 }
1333 iPrev = i;
1334 }
1335 }
1336 pPage->iNext = NIL_PGMPOOL_IDX;
1337}
1338
1339
1340/**
1341 * Frees up one cache page.
1342 *
1343 * @returns VBox status code.
1344 * @retval VINF_SUCCESS on success.
1345 * @param pPool The pool.
1346 * @param iUser The user index.
1347 */
1348static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1349{
1350#ifndef IN_RC
1351 const PVM pVM = pPool->CTX_SUFF(pVM);
1352#endif
1353 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1354 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1355
1356 /*
1357 * Select one page from the tail of the age list.
1358 */
1359 PPGMPOOLPAGE pPage;
1360 for (unsigned iLoop = 0; ; iLoop++)
1361 {
1362 uint16_t iToFree = pPool->iAgeTail;
1363 if (iToFree == iUser)
1364 iToFree = pPool->aPages[iToFree].iAgePrev;
1365/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1366 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1367 {
1368 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1369 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1370 {
1371 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1372 continue;
1373 iToFree = i;
1374 break;
1375 }
1376 }
1377*/
1378 Assert(iToFree != iUser);
1379 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1380 pPage = &pPool->aPages[iToFree];
1381
1382 /*
1383 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1384 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1385 */
1386 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1387 break;
1388 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1389 pgmPoolCacheUsed(pPool, pPage);
1390 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1391 }
1392
1393 /*
1394 * Found a usable page, flush it and return.
1395 */
1396 int rc = pgmPoolFlushPage(pPool, pPage);
1397 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1398 if (rc == VINF_SUCCESS)
1399 PGM_INVL_ALL_VCPU_TLBS(pVM);
1400 return rc;
1401}
1402
1403
1404/**
1405 * Checks if a kind mismatch is really a page being reused
1406 * or if it's just normal remappings.
1407 *
1408 * @returns true if reused and the cached page (enmKind1) should be flushed
1409 * @returns false if not reused.
1410 * @param enmKind1 The kind of the cached page.
1411 * @param enmKind2 The kind of the requested page.
1412 */
1413static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1414{
1415 switch (enmKind1)
1416 {
1417 /*
1418 * Never reuse them. There is no remapping in non-paging mode.
1419 */
1420 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1421 case PGMPOOLKIND_32BIT_PD_PHYS:
1422 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1423 case PGMPOOLKIND_PAE_PD_PHYS:
1424 case PGMPOOLKIND_PAE_PDPT_PHYS:
1425 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1426 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1427 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1428 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1429 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1430 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1431 return false;
1432
1433 /*
1434 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1435 */
1436 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1437 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1438 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1439 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1440 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1441 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1442 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1443 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1444 case PGMPOOLKIND_32BIT_PD:
1445 case PGMPOOLKIND_PAE_PDPT:
1446 switch (enmKind2)
1447 {
1448 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1449 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1450 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1451 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1452 case PGMPOOLKIND_64BIT_PML4:
1453 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1454 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1455 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1456 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1457 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1458 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1459 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1460 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1461 return true;
1462 default:
1463 return false;
1464 }
1465
1466 /*
1467 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1468 */
1469 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1470 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1471 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1472 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1473 case PGMPOOLKIND_64BIT_PML4:
1474 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1475 switch (enmKind2)
1476 {
1477 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1478 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1479 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1480 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1481 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1482 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1483 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1484 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1485 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1486 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1487 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1488 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1489 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1490 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1491 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1492 return true;
1493 default:
1494 return false;
1495 }
1496
1497 /*
1498 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1499 */
1500 case PGMPOOLKIND_ROOT_NESTED:
1501 return false;
1502
1503 default:
1504 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1505 }
1506}
1507
1508
1509/**
1510 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1511 *
1512 * @returns VBox status code.
1513 * @retval VINF_PGM_CACHED_PAGE on success.
1514 * @retval VERR_FILE_NOT_FOUND if not found.
1515 * @param pPool The pool.
1516 * @param GCPhys The GC physical address of the page we're gonna shadow.
1517 * @param enmKind The kind of mapping.
1518 * @param enmAccess Access type for the mapping (only relevant for big pages)
1519 * @param iUser The shadow page pool index of the user table.
1520 * @param iUserTable The index into the user table (shadowed).
1521 * @param ppPage Where to store the pointer to the page.
1522 */
1523static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1524{
1525#ifndef IN_RC
1526 const PVM pVM = pPool->CTX_SUFF(pVM);
1527#endif
1528 /*
1529 * Look up the GCPhys in the hash.
1530 */
1531 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1532 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
1533 if (i != NIL_PGMPOOL_IDX)
1534 {
1535 do
1536 {
1537 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1538 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1539 if (pPage->GCPhys == GCPhys)
1540 {
1541 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
1542 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
1543 {
1544 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
1545 * doesn't flush it in case there are no more free use records.
1546 */
1547 pgmPoolCacheUsed(pPool, pPage);
1548
1549 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1550 if (RT_SUCCESS(rc))
1551 {
1552 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
1553 *ppPage = pPage;
1554 if (pPage->cModifications)
1555 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
1556 STAM_COUNTER_INC(&pPool->StatCacheHits);
1557 return VINF_PGM_CACHED_PAGE;
1558 }
1559 return rc;
1560 }
1561
1562 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
1563 {
1564 /*
1565 * The kind is different. In some cases we should now flush the page
1566 * as it has been reused, but in most cases this is normal remapping
1567 * of PDs as PT or big pages using the GCPhys field in a slightly
1568 * different way than the other kinds.
1569 */
1570 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1571 {
1572 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1573 pgmPoolFlushPage(pPool, pPage);
1574 PGM_INVL_VCPU_TLBS(VMMGetCpu(pVM)); /* see PT handler. */
1575 break;
1576 }
1577 }
1578 }
1579
1580 /* next */
1581 i = pPage->iNext;
1582 } while (i != NIL_PGMPOOL_IDX);
1583 }
1584
1585 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
1586 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1587 return VERR_FILE_NOT_FOUND;
1588}
1589
1590
1591/**
1592 * Inserts a page into the cache.
1593 *
1594 * @param pPool The pool.
1595 * @param pPage The cached page.
1596 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1597 */
1598static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1599{
1600 /*
1601 * Insert into the GCPhys hash if the page is fit for that.
1602 */
1603 Assert(!pPage->fCached);
1604 if (fCanBeCached)
1605 {
1606 pPage->fCached = true;
1607 pgmPoolHashInsert(pPool, pPage);
1608 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1609 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1610 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1611 }
1612 else
1613 {
1614 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1615 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1616 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1617 }
1618
1619 /*
1620 * Insert at the head of the age list.
1621 */
1622 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1623 pPage->iAgeNext = pPool->iAgeHead;
1624 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1625 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1626 else
1627 pPool->iAgeTail = pPage->idx;
1628 pPool->iAgeHead = pPage->idx;
1629}
1630
1631
1632/**
1633 * Flushes a cached page.
1634 *
1635 * @param pPool The pool.
1636 * @param pPage The cached page.
1637 */
1638static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1639{
1640 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1641
1642 /*
1643 * Remove the page from the hash.
1644 */
1645 if (pPage->fCached)
1646 {
1647 pPage->fCached = false;
1648 pgmPoolHashRemove(pPool, pPage);
1649 }
1650 else
1651 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1652
1653 /*
1654 * Remove it from the age list.
1655 */
1656 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1657 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1658 else
1659 pPool->iAgeTail = pPage->iAgePrev;
1660 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1661 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1662 else
1663 pPool->iAgeHead = pPage->iAgeNext;
1664 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1665 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1666}
1667
1668#endif /* PGMPOOL_WITH_CACHE */
1669#ifdef PGMPOOL_WITH_MONITORING
1670
1671/**
1672 * Looks for pages sharing the monitor.
1673 *
1674 * @returns Pointer to the head page.
1675 * @returns NULL if not found.
1676 * @param pPool The Pool
1677 * @param pNewPage The page which is going to be monitored.
1678 */
1679static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1680{
1681#ifdef PGMPOOL_WITH_CACHE
1682 /*
1683 * Look up the GCPhys in the hash.
1684 */
1685 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1686 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1687 if (i == NIL_PGMPOOL_IDX)
1688 return NULL;
1689 do
1690 {
1691 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1692 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1693 && pPage != pNewPage)
1694 {
1695 switch (pPage->enmKind)
1696 {
1697 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1698 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1699 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1700 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1701 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1702 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1703 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1704 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1705 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1706 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1707 case PGMPOOLKIND_64BIT_PML4:
1708 case PGMPOOLKIND_32BIT_PD:
1709 case PGMPOOLKIND_PAE_PDPT:
1710 {
1711 /* find the head */
1712 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1713 {
1714 Assert(pPage->iMonitoredPrev != pPage->idx);
1715 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1716 }
1717 return pPage;
1718 }
1719
1720 /* ignore, no monitoring. */
1721 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1722 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1723 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1724 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1725 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1726 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1727 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1728 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1729 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1730 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1731 case PGMPOOLKIND_ROOT_NESTED:
1732 case PGMPOOLKIND_PAE_PD_PHYS:
1733 case PGMPOOLKIND_PAE_PDPT_PHYS:
1734 case PGMPOOLKIND_32BIT_PD_PHYS:
1735 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1736 break;
1737 default:
1738 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1739 }
1740 }
1741
1742 /* next */
1743 i = pPage->iNext;
1744 } while (i != NIL_PGMPOOL_IDX);
1745#endif
1746 return NULL;
1747}
1748
1749
1750/**
1751 * Enabled write monitoring of a guest page.
1752 *
1753 * @returns VBox status code.
1754 * @retval VINF_SUCCESS on success.
1755 * @param pPool The pool.
1756 * @param pPage The cached page.
1757 */
1758static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1759{
1760 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1761
1762 /*
1763 * Filter out the relevant kinds.
1764 */
1765 switch (pPage->enmKind)
1766 {
1767 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1768 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1769 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1770 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1771 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1772 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1773 case PGMPOOLKIND_64BIT_PML4:
1774 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1775 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1776 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1777 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1778 case PGMPOOLKIND_32BIT_PD:
1779 case PGMPOOLKIND_PAE_PDPT:
1780 break;
1781
1782 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1783 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1784 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1785 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1786 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1787 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1788 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1789 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1790 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1791 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1792 case PGMPOOLKIND_ROOT_NESTED:
1793 /* Nothing to monitor here. */
1794 return VINF_SUCCESS;
1795
1796 case PGMPOOLKIND_32BIT_PD_PHYS:
1797 case PGMPOOLKIND_PAE_PDPT_PHYS:
1798 case PGMPOOLKIND_PAE_PD_PHYS:
1799 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1800 /* Nothing to monitor here. */
1801 return VINF_SUCCESS;
1802#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1803 break;
1804#else
1805 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1806#endif
1807 default:
1808 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1809 }
1810
1811 /*
1812 * Install handler.
1813 */
1814 int rc;
1815 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1816 if (pPageHead)
1817 {
1818 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1819 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1820 pPage->iMonitoredPrev = pPageHead->idx;
1821 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1822 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1823 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1824 pPageHead->iMonitoredNext = pPage->idx;
1825 rc = VINF_SUCCESS;
1826 }
1827 else
1828 {
1829 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1830 PVM pVM = pPool->CTX_SUFF(pVM);
1831 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1832 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1833 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1834 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1835 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1836 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1837 pPool->pszAccessHandler);
1838 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1839 * the heap size should suffice. */
1840 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
1841 Assert(!(VMMGetCpu(pVM)->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(VMMGetCpu(pVM), VMCPU_FF_PGM_SYNC_CR3));
1842 }
1843 pPage->fMonitored = true;
1844 return rc;
1845}
1846
1847
1848/**
1849 * Disables write monitoring of a guest page.
1850 *
1851 * @returns VBox status code.
1852 * @retval VINF_SUCCESS on success.
1853 * @param pPool The pool.
1854 * @param pPage The cached page.
1855 */
1856static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1857{
1858 /*
1859 * Filter out the relevant kinds.
1860 */
1861 switch (pPage->enmKind)
1862 {
1863 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1864 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1865 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1866 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1867 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1868 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1869 case PGMPOOLKIND_64BIT_PML4:
1870 case PGMPOOLKIND_32BIT_PD:
1871 case PGMPOOLKIND_PAE_PDPT:
1872 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1873 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1874 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1875 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1876 break;
1877
1878 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1879 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1880 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1881 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1882 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1883 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1884 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1885 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1886 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1887 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1888 case PGMPOOLKIND_ROOT_NESTED:
1889 case PGMPOOLKIND_PAE_PD_PHYS:
1890 case PGMPOOLKIND_PAE_PDPT_PHYS:
1891 case PGMPOOLKIND_32BIT_PD_PHYS:
1892 /* Nothing to monitor here. */
1893 return VINF_SUCCESS;
1894
1895#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1896 break;
1897#endif
1898 default:
1899 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1900 }
1901
1902 /*
1903 * Remove the page from the monitored list or uninstall it if last.
1904 */
1905 const PVM pVM = pPool->CTX_SUFF(pVM);
1906 int rc;
1907 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1908 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1909 {
1910 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1911 {
1912 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1913 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1914 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1915 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1916 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1917 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
1918 pPool->pszAccessHandler);
1919 AssertFatalRCSuccess(rc);
1920 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1921 }
1922 else
1923 {
1924 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1925 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1926 {
1927 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1928 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1929 }
1930 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1931 rc = VINF_SUCCESS;
1932 }
1933 }
1934 else
1935 {
1936 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1937 AssertFatalRC(rc);
1938#ifdef VBOX_STRICT
1939 PVMCPU pVCpu = VMMGetCpu(pVM);
1940#endif
1941 AssertMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
1942 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
1943 }
1944 pPage->fMonitored = false;
1945
1946 /*
1947 * Remove it from the list of modified pages (if in it).
1948 */
1949 pgmPoolMonitorModifiedRemove(pPool, pPage);
1950
1951 return rc;
1952}
1953
1954
1955/**
1956 * Inserts the page into the list of modified pages.
1957 *
1958 * @param pPool The pool.
1959 * @param pPage The page.
1960 */
1961void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1962{
1963 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1964 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1965 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1966 && pPool->iModifiedHead != pPage->idx,
1967 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1968 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1969 pPool->iModifiedHead, pPool->cModifiedPages));
1970
1971 pPage->iModifiedNext = pPool->iModifiedHead;
1972 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1973 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1974 pPool->iModifiedHead = pPage->idx;
1975 pPool->cModifiedPages++;
1976#ifdef VBOX_WITH_STATISTICS
1977 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1978 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1979#endif
1980}
1981
1982
1983/**
1984 * Removes the page from the list of modified pages and resets the
1985 * moficiation counter.
1986 *
1987 * @param pPool The pool.
1988 * @param pPage The page which is believed to be in the list of modified pages.
1989 */
1990static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1991{
1992 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1993 if (pPool->iModifiedHead == pPage->idx)
1994 {
1995 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1996 pPool->iModifiedHead = pPage->iModifiedNext;
1997 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1998 {
1999 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2000 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2001 }
2002 pPool->cModifiedPages--;
2003 }
2004 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2005 {
2006 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2007 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2008 {
2009 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2010 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2011 }
2012 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2013 pPool->cModifiedPages--;
2014 }
2015 else
2016 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2017 pPage->cModifications = 0;
2018}
2019
2020
2021/**
2022 * Zaps the list of modified pages, resetting their modification counters in the process.
2023 *
2024 * @param pVM The VM handle.
2025 */
2026void pgmPoolMonitorModifiedClearAll(PVM pVM)
2027{
2028 pgmLock(pVM);
2029 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2030 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2031
2032 unsigned cPages = 0; NOREF(cPages);
2033 uint16_t idx = pPool->iModifiedHead;
2034 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2035 while (idx != NIL_PGMPOOL_IDX)
2036 {
2037 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2038 idx = pPage->iModifiedNext;
2039 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2040 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2041 pPage->cModifications = 0;
2042 Assert(++cPages);
2043 }
2044 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2045 pPool->cModifiedPages = 0;
2046 pgmUnlock(pVM);
2047}
2048
2049
2050#ifdef IN_RING3
2051/**
2052 * Callback to clear all shadow pages and clear all modification counters.
2053 *
2054 * @returns VBox status code.
2055 * @param pVM The VM handle.
2056 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
2057 * @param pvUser Unused parameter.
2058 *
2059 * @remark Should only be used when monitoring is available, thus placed in
2060 * the PGMPOOL_WITH_MONITORING \#ifdef.
2061 */
2062DECLCALLBACK(int) pgmPoolClearAll(PVM pVM, PVMCPU pVCpu, void *pvUser)
2063{
2064 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2065 STAM_PROFILE_START(&pPool->StatClearAll, c);
2066 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2067 NOREF(pvUser); NOREF(pVCpu);
2068
2069 pgmLock(pVM);
2070
2071 /*
2072 * Iterate all the pages until we've encountered all that in use.
2073 * This is simple but not quite optimal solution.
2074 */
2075 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2076 unsigned cLeft = pPool->cUsedPages;
2077 unsigned iPage = pPool->cCurPages;
2078 while (--iPage >= PGMPOOL_IDX_FIRST)
2079 {
2080 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2081 if (pPage->GCPhys != NIL_RTGCPHYS)
2082 {
2083 switch (pPage->enmKind)
2084 {
2085 /*
2086 * We only care about shadow page tables.
2087 */
2088 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2089 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2090 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2091 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2092 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2093 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2094 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2095 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2096 {
2097#ifdef PGMPOOL_WITH_USER_TRACKING
2098 if (pPage->cPresent)
2099#endif
2100 {
2101 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2102 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2103 ASMMemZeroPage(pvShw);
2104 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2105#ifdef PGMPOOL_WITH_USER_TRACKING
2106 pPage->cPresent = 0;
2107 pPage->iFirstPresent = ~0;
2108#endif
2109 }
2110 }
2111 /* fall thru */
2112
2113 default:
2114 Assert(!pPage->cModifications || ++cModifiedPages);
2115 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2116 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2117 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2118 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2119 pPage->cModifications = 0;
2120 break;
2121
2122 }
2123 if (!--cLeft)
2124 break;
2125 }
2126 }
2127
2128 /* swipe the special pages too. */
2129 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2130 {
2131 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2132 if (pPage->GCPhys != NIL_RTGCPHYS)
2133 {
2134 Assert(!pPage->cModifications || ++cModifiedPages);
2135 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2136 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2137 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2138 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2139 pPage->cModifications = 0;
2140 }
2141 }
2142
2143#ifndef DEBUG_michael
2144 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2145#endif
2146 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2147 pPool->cModifiedPages = 0;
2148
2149#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2150 /*
2151 * Clear all the GCPhys links and rebuild the phys ext free list.
2152 */
2153 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2154 pRam;
2155 pRam = pRam->CTX_SUFF(pNext))
2156 {
2157 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2158 while (iPage-- > 0)
2159 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
2160 }
2161
2162 pPool->iPhysExtFreeHead = 0;
2163 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2164 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2165 for (unsigned i = 0; i < cMaxPhysExts; i++)
2166 {
2167 paPhysExts[i].iNext = i + 1;
2168 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2169 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2170 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2171 }
2172 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2173#endif
2174
2175 /* Clear the PGM_SYNC_CLEAR_PGM_POOL flag on all VCPUs to prevent redundant flushes. */
2176 for (unsigned idCpu = 0; idCpu < pVM->cCPUs; idCpu++)
2177 {
2178 PVMCPU pVCpu = &pVM->aCpus[idCpu];
2179
2180 pVCpu->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2181 }
2182
2183 pPool->cPresent = 0;
2184 pgmUnlock(pVM);
2185 PGM_INVL_ALL_VCPU_TLBS(pVM);
2186 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2187 return VINF_SUCCESS;
2188}
2189#endif /* IN_RING3 */
2190
2191
2192/**
2193 * Handle SyncCR3 pool tasks
2194 *
2195 * @returns VBox status code.
2196 * @retval VINF_SUCCESS if successfully added.
2197 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2198 * @param pVCpu The VMCPU handle.
2199 * @remark Should only be used when monitoring is available, thus placed in
2200 * the PGMPOOL_WITH_MONITORING #ifdef.
2201 */
2202int pgmPoolSyncCR3(PVMCPU pVCpu)
2203{
2204 PVM pVM = pVCpu->CTX_SUFF(pVM);
2205 LogFlow(("pgmPoolSyncCR3\n"));
2206
2207 /*
2208 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2209 * Occasionally we will have to clear all the shadow page tables because we wanted
2210 * to monitor a page which was mapped by too many shadowed page tables. This operation
2211 * sometimes refered to as a 'lightweight flush'.
2212 */
2213# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2214 if (ASMBitTestAndClear(&pVCpu->pgm.s.fSyncFlags, PGM_SYNC_CLEAR_PGM_POOL_BIT))
2215 {
2216 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmPoolClearAll, NULL);
2217 AssertRC(rc);
2218 }
2219# else /* !IN_RING3 */
2220 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2221 {
2222 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2223 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2224 return VINF_PGM_SYNC_CR3;
2225 }
2226# endif /* !IN_RING3 */
2227 else
2228 pgmPoolMonitorModifiedClearAll(pVM);
2229
2230 return VINF_SUCCESS;
2231}
2232
2233#endif /* PGMPOOL_WITH_MONITORING */
2234#ifdef PGMPOOL_WITH_USER_TRACKING
2235
2236/**
2237 * Frees up at least one user entry.
2238 *
2239 * @returns VBox status code.
2240 * @retval VINF_SUCCESS if successfully added.
2241 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2242 * @param pPool The pool.
2243 * @param iUser The user index.
2244 */
2245static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2246{
2247 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2248#ifdef PGMPOOL_WITH_CACHE
2249 /*
2250 * Just free cached pages in a braindead fashion.
2251 */
2252 /** @todo walk the age list backwards and free the first with usage. */
2253 int rc = VINF_SUCCESS;
2254 do
2255 {
2256 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2257 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2258 rc = rc2;
2259 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2260 return rc;
2261#else
2262 /*
2263 * Lazy approach.
2264 */
2265 /* @todo This path no longer works (CR3 root pages will be flushed)!! */
2266 AssertCompileFailed();
2267 Assert(!CPUMIsGuestInLongMode(pVM));
2268 pgmPoolFlushAllInt(pPool);
2269 return VERR_PGM_POOL_FLUSHED;
2270#endif
2271}
2272
2273
2274/**
2275 * Inserts a page into the cache.
2276 *
2277 * This will create user node for the page, insert it into the GCPhys
2278 * hash, and insert it into the age list.
2279 *
2280 * @returns VBox status code.
2281 * @retval VINF_SUCCESS if successfully added.
2282 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2283 * @param pPool The pool.
2284 * @param pPage The cached page.
2285 * @param GCPhys The GC physical address of the page we're gonna shadow.
2286 * @param iUser The user index.
2287 * @param iUserTable The user table index.
2288 */
2289DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2290{
2291 int rc = VINF_SUCCESS;
2292 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2293
2294 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2295
2296#ifdef VBOX_STRICT
2297 /*
2298 * Check that the entry doesn't already exists.
2299 */
2300 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2301 {
2302 uint16_t i = pPage->iUserHead;
2303 do
2304 {
2305 Assert(i < pPool->cMaxUsers);
2306 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2307 i = paUsers[i].iNext;
2308 } while (i != NIL_PGMPOOL_USER_INDEX);
2309 }
2310#endif
2311
2312 /*
2313 * Find free a user node.
2314 */
2315 uint16_t i = pPool->iUserFreeHead;
2316 if (i == NIL_PGMPOOL_USER_INDEX)
2317 {
2318 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2319 if (RT_FAILURE(rc))
2320 return rc;
2321 i = pPool->iUserFreeHead;
2322 }
2323
2324 /*
2325 * Unlink the user node from the free list,
2326 * initialize and insert it into the user list.
2327 */
2328 pPool->iUserFreeHead = paUsers[i].iNext;
2329 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2330 paUsers[i].iUser = iUser;
2331 paUsers[i].iUserTable = iUserTable;
2332 pPage->iUserHead = i;
2333
2334 /*
2335 * Insert into cache and enable monitoring of the guest page if enabled.
2336 *
2337 * Until we implement caching of all levels, including the CR3 one, we'll
2338 * have to make sure we don't try monitor & cache any recursive reuse of
2339 * a monitored CR3 page. Because all windows versions are doing this we'll
2340 * have to be able to do combined access monitoring, CR3 + PT and
2341 * PD + PT (guest PAE).
2342 *
2343 * Update:
2344 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2345 */
2346#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2347# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2348 const bool fCanBeMonitored = true;
2349# else
2350 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2351 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2352 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2353# endif
2354# ifdef PGMPOOL_WITH_CACHE
2355 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2356# endif
2357 if (fCanBeMonitored)
2358 {
2359# ifdef PGMPOOL_WITH_MONITORING
2360 rc = pgmPoolMonitorInsert(pPool, pPage);
2361 AssertRC(rc);
2362 }
2363# endif
2364#endif /* PGMPOOL_WITH_MONITORING */
2365 return rc;
2366}
2367
2368
2369# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2370/**
2371 * Adds a user reference to a page.
2372 *
2373 * This will move the page to the head of the
2374 *
2375 * @returns VBox status code.
2376 * @retval VINF_SUCCESS if successfully added.
2377 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2378 * @param pPool The pool.
2379 * @param pPage The cached page.
2380 * @param iUser The user index.
2381 * @param iUserTable The user table.
2382 */
2383static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2384{
2385 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2386
2387 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2388
2389# ifdef VBOX_STRICT
2390 /*
2391 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2392 */
2393 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2394 {
2395 uint16_t i = pPage->iUserHead;
2396 do
2397 {
2398 Assert(i < pPool->cMaxUsers);
2399 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2400 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2401 i = paUsers[i].iNext;
2402 } while (i != NIL_PGMPOOL_USER_INDEX);
2403 }
2404# endif
2405
2406 /*
2407 * Allocate a user node.
2408 */
2409 uint16_t i = pPool->iUserFreeHead;
2410 if (i == NIL_PGMPOOL_USER_INDEX)
2411 {
2412 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2413 if (RT_FAILURE(rc))
2414 return rc;
2415 i = pPool->iUserFreeHead;
2416 }
2417 pPool->iUserFreeHead = paUsers[i].iNext;
2418
2419 /*
2420 * Initialize the user node and insert it.
2421 */
2422 paUsers[i].iNext = pPage->iUserHead;
2423 paUsers[i].iUser = iUser;
2424 paUsers[i].iUserTable = iUserTable;
2425 pPage->iUserHead = i;
2426
2427# ifdef PGMPOOL_WITH_CACHE
2428 /*
2429 * Tell the cache to update its replacement stats for this page.
2430 */
2431 pgmPoolCacheUsed(pPool, pPage);
2432# endif
2433 return VINF_SUCCESS;
2434}
2435# endif /* PGMPOOL_WITH_CACHE */
2436
2437
2438/**
2439 * Frees a user record associated with a page.
2440 *
2441 * This does not clear the entry in the user table, it simply replaces the
2442 * user record to the chain of free records.
2443 *
2444 * @param pPool The pool.
2445 * @param HCPhys The HC physical address of the shadow page.
2446 * @param iUser The shadow page pool index of the user table.
2447 * @param iUserTable The index into the user table (shadowed).
2448 */
2449static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2450{
2451 /*
2452 * Unlink and free the specified user entry.
2453 */
2454 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2455
2456 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2457 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2458 uint16_t i = pPage->iUserHead;
2459 if ( i != NIL_PGMPOOL_USER_INDEX
2460 && paUsers[i].iUser == iUser
2461 && paUsers[i].iUserTable == iUserTable)
2462 {
2463 pPage->iUserHead = paUsers[i].iNext;
2464
2465 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2466 paUsers[i].iNext = pPool->iUserFreeHead;
2467 pPool->iUserFreeHead = i;
2468 return;
2469 }
2470
2471 /* General: Linear search. */
2472 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2473 while (i != NIL_PGMPOOL_USER_INDEX)
2474 {
2475 if ( paUsers[i].iUser == iUser
2476 && paUsers[i].iUserTable == iUserTable)
2477 {
2478 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2479 paUsers[iPrev].iNext = paUsers[i].iNext;
2480 else
2481 pPage->iUserHead = paUsers[i].iNext;
2482
2483 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2484 paUsers[i].iNext = pPool->iUserFreeHead;
2485 pPool->iUserFreeHead = i;
2486 return;
2487 }
2488 iPrev = i;
2489 i = paUsers[i].iNext;
2490 }
2491
2492 /* Fatal: didn't find it */
2493 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2494 iUser, iUserTable, pPage->GCPhys));
2495}
2496
2497
2498/**
2499 * Gets the entry size of a shadow table.
2500 *
2501 * @param enmKind The kind of page.
2502 *
2503 * @returns The size of the entry in bytes. That is, 4 or 8.
2504 * @returns If the kind is not for a table, an assertion is raised and 0 is
2505 * returned.
2506 */
2507DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2508{
2509 switch (enmKind)
2510 {
2511 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2512 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2513 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2514 case PGMPOOLKIND_32BIT_PD:
2515 case PGMPOOLKIND_32BIT_PD_PHYS:
2516 return 4;
2517
2518 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2519 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2520 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2521 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2522 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2523 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2524 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2525 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2526 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2527 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2528 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2529 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2530 case PGMPOOLKIND_64BIT_PML4:
2531 case PGMPOOLKIND_PAE_PDPT:
2532 case PGMPOOLKIND_ROOT_NESTED:
2533 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2534 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2535 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2536 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2537 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2538 case PGMPOOLKIND_PAE_PD_PHYS:
2539 case PGMPOOLKIND_PAE_PDPT_PHYS:
2540 return 8;
2541
2542 default:
2543 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2544 }
2545}
2546
2547
2548/**
2549 * Gets the entry size of a guest table.
2550 *
2551 * @param enmKind The kind of page.
2552 *
2553 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2554 * @returns If the kind is not for a table, an assertion is raised and 0 is
2555 * returned.
2556 */
2557DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2558{
2559 switch (enmKind)
2560 {
2561 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2562 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2563 case PGMPOOLKIND_32BIT_PD:
2564 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2565 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2566 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2567 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2568 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2569 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2570 return 4;
2571
2572 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2573 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2574 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2575 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2576 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2577 case PGMPOOLKIND_64BIT_PML4:
2578 case PGMPOOLKIND_PAE_PDPT:
2579 return 8;
2580
2581 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2582 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2583 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2584 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2585 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2586 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2587 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2588 case PGMPOOLKIND_ROOT_NESTED:
2589 case PGMPOOLKIND_PAE_PD_PHYS:
2590 case PGMPOOLKIND_PAE_PDPT_PHYS:
2591 case PGMPOOLKIND_32BIT_PD_PHYS:
2592 /** @todo can we return 0? (nobody is calling this...) */
2593 AssertFailed();
2594 return 0;
2595
2596 default:
2597 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2598 }
2599}
2600
2601#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2602
2603/**
2604 * Scans one shadow page table for mappings of a physical page.
2605 *
2606 * @param pVM The VM handle.
2607 * @param pPhysPage The guest page in question.
2608 * @param iShw The shadow page table.
2609 * @param cRefs The number of references made in that PT.
2610 */
2611static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2612{
2613 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
2614 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2615
2616 /*
2617 * Assert sanity.
2618 */
2619 Assert(cRefs == 1);
2620 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2621 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2622
2623 /*
2624 * Then, clear the actual mappings to the page in the shadow PT.
2625 */
2626 switch (pPage->enmKind)
2627 {
2628 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2629 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2630 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2631 {
2632 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2633 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2634 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2635 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2636 {
2637 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2638 pPT->a[i].u = 0;
2639 cRefs--;
2640 if (!cRefs)
2641 return;
2642 }
2643#ifdef LOG_ENABLED
2644 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2645 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2646 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2647 {
2648 Log(("i=%d cRefs=%d\n", i, cRefs--));
2649 }
2650#endif
2651 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2652 break;
2653 }
2654
2655 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2656 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2657 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2658 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2659 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2660 {
2661 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2662 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2663 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2664 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2665 {
2666 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2667 pPT->a[i].u = 0;
2668 cRefs--;
2669 if (!cRefs)
2670 return;
2671 }
2672#ifdef LOG_ENABLED
2673 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2674 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2675 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2676 {
2677 Log(("i=%d cRefs=%d\n", i, cRefs--));
2678 }
2679#endif
2680 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
2681 break;
2682 }
2683
2684 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2685 {
2686 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2687 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2688 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2689 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2690 {
2691 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2692 pPT->a[i].u = 0;
2693 cRefs--;
2694 if (!cRefs)
2695 return;
2696 }
2697#ifdef LOG_ENABLED
2698 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2699 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2700 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2701 {
2702 Log(("i=%d cRefs=%d\n", i, cRefs--));
2703 }
2704#endif
2705 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2706 break;
2707 }
2708
2709 default:
2710 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2711 }
2712}
2713
2714
2715/**
2716 * Scans one shadow page table for mappings of a physical page.
2717 *
2718 * @param pVM The VM handle.
2719 * @param pPhysPage The guest page in question.
2720 * @param iShw The shadow page table.
2721 * @param cRefs The number of references made in that PT.
2722 */
2723void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2724{
2725 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2726 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
2727 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2728 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2729 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
2730 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2731}
2732
2733
2734/**
2735 * Flushes a list of shadow page tables mapping the same physical page.
2736 *
2737 * @param pVM The VM handle.
2738 * @param pPhysPage The guest page in question.
2739 * @param iPhysExt The physical cross reference extent list to flush.
2740 */
2741void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2742{
2743 Assert(PGMIsLockOwner(pVM));
2744 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2745 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2746 LogFlow(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%R[pgmpage] iPhysExt\n", pPhysPage, iPhysExt));
2747
2748 const uint16_t iPhysExtStart = iPhysExt;
2749 PPGMPOOLPHYSEXT pPhysExt;
2750 do
2751 {
2752 Assert(iPhysExt < pPool->cMaxPhysExts);
2753 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2754 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2755 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2756 {
2757 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2758 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2759 }
2760
2761 /* next */
2762 iPhysExt = pPhysExt->iNext;
2763 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2764
2765 /* insert the list into the free list and clear the ram range entry. */
2766 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2767 pPool->iPhysExtFreeHead = iPhysExtStart;
2768 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
2769
2770 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2771}
2772
2773#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2774
2775/**
2776 * Flushes all shadow page table mappings of the given guest page.
2777 *
2778 * This is typically called when the host page backing the guest one has been
2779 * replaced or when the page protection was changed due to an access handler.
2780 *
2781 * @returns VBox status code.
2782 * @retval VINF_SUCCESS if all references has been successfully cleared.
2783 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
2784 * pool cleaning. FF and sync flags are set.
2785 *
2786 * @param pVM The VM handle.
2787 * @param pPhysPage The guest page in question.
2788 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
2789 * flushed, it is NOT touched if this isn't necessary.
2790 * The caller MUST initialized this to @a false.
2791 */
2792int pgmPoolTrackFlushGCPhys(PVM pVM, PPGMPAGE pPhysPage, bool *pfFlushTLBs)
2793{
2794 PVMCPU pVCpu = VMMGetCpu(pVM);
2795 pgmLock(pVM);
2796 int rc = VINF_SUCCESS;
2797#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2798 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
2799 if (u16)
2800 {
2801 /*
2802 * The zero page is currently screwing up the tracking and we'll
2803 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2804 * is defined, zero pages won't normally be mapped. Some kind of solution
2805 * will be needed for this problem of course, but it will have to wait...
2806 */
2807 if (PGM_PAGE_IS_ZERO(pPhysPage))
2808 rc = VINF_PGM_GCPHYS_ALIASED;
2809 else
2810 {
2811# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2812 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
2813 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
2814 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
2815# endif
2816
2817 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
2818 pgmPoolTrackFlushGCPhysPT(pVM,
2819 pPhysPage,
2820 PGMPOOL_TD_GET_IDX(u16),
2821 PGMPOOL_TD_GET_CREFS(u16));
2822 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
2823 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, PGMPOOL_TD_GET_IDX(u16));
2824 else
2825 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
2826 *pfFlushTLBs = true;
2827
2828# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2829 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
2830# endif
2831 }
2832 }
2833
2834#elif defined(PGMPOOL_WITH_CACHE)
2835 if (PGM_PAGE_IS_ZERO(pPhysPage))
2836 rc = VINF_PGM_GCPHYS_ALIASED;
2837 else
2838 {
2839# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2840 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow kill the pool otherwise. */
2841 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
2842# endif
2843 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
2844 if (rc == VINF_SUCCESS)
2845 *pfFlushTLBs = true;
2846 }
2847
2848# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2849 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
2850# endif
2851
2852#else
2853 rc = VINF_PGM_GCPHYS_ALIASED;
2854#endif
2855
2856 if (rc == VINF_PGM_GCPHYS_ALIASED)
2857 {
2858 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
2859 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2860 rc = VINF_PGM_SYNC_CR3;
2861 }
2862 pgmUnlock(pVM);
2863 return rc;
2864}
2865
2866
2867/**
2868 * Scans all shadow page tables for mappings of a physical page.
2869 *
2870 * This may be slow, but it's most likely more efficient than cleaning
2871 * out the entire page pool / cache.
2872 *
2873 * @returns VBox status code.
2874 * @retval VINF_SUCCESS if all references has been successfully cleared.
2875 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2876 * a page pool cleaning.
2877 *
2878 * @param pVM The VM handle.
2879 * @param pPhysPage The guest page in question.
2880 */
2881int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2882{
2883 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2884 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2885 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
2886 pPool->cUsedPages, pPool->cPresent, pPhysPage));
2887
2888#if 1
2889 /*
2890 * There is a limit to what makes sense.
2891 */
2892 if (pPool->cPresent > 1024)
2893 {
2894 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2895 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2896 return VINF_PGM_GCPHYS_ALIASED;
2897 }
2898#endif
2899
2900 /*
2901 * Iterate all the pages until we've encountered all that in use.
2902 * This is simple but not quite optimal solution.
2903 */
2904 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2905 const uint32_t u32 = u64;
2906 unsigned cLeft = pPool->cUsedPages;
2907 unsigned iPage = pPool->cCurPages;
2908 while (--iPage >= PGMPOOL_IDX_FIRST)
2909 {
2910 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2911 if (pPage->GCPhys != NIL_RTGCPHYS)
2912 {
2913 switch (pPage->enmKind)
2914 {
2915 /*
2916 * We only care about shadow page tables.
2917 */
2918 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2919 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2920 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2921 {
2922 unsigned cPresent = pPage->cPresent;
2923 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2924 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2925 if (pPT->a[i].n.u1Present)
2926 {
2927 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2928 {
2929 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2930 pPT->a[i].u = 0;
2931 }
2932 if (!--cPresent)
2933 break;
2934 }
2935 break;
2936 }
2937
2938 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2939 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2940 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2941 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2942 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2943 {
2944 unsigned cPresent = pPage->cPresent;
2945 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2946 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2947 if (pPT->a[i].n.u1Present)
2948 {
2949 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2950 {
2951 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2952 pPT->a[i].u = 0;
2953 }
2954 if (!--cPresent)
2955 break;
2956 }
2957 break;
2958 }
2959 }
2960 if (!--cLeft)
2961 break;
2962 }
2963 }
2964
2965 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
2966 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2967 return VINF_SUCCESS;
2968}
2969
2970
2971/**
2972 * Clears the user entry in a user table.
2973 *
2974 * This is used to remove all references to a page when flushing it.
2975 */
2976static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2977{
2978 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2979 Assert(pUser->iUser < pPool->cCurPages);
2980 uint32_t iUserTable = pUser->iUserTable;
2981
2982 /*
2983 * Map the user page.
2984 */
2985 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2986 union
2987 {
2988 uint64_t *pau64;
2989 uint32_t *pau32;
2990 } u;
2991 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
2992
2993 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
2994
2995 /* Safety precaution in case we change the paging for other modes too in the future. */
2996 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
2997
2998#ifdef VBOX_STRICT
2999 /*
3000 * Some sanity checks.
3001 */
3002 switch (pUserPage->enmKind)
3003 {
3004 case PGMPOOLKIND_32BIT_PD:
3005 case PGMPOOLKIND_32BIT_PD_PHYS:
3006 Assert(iUserTable < X86_PG_ENTRIES);
3007 break;
3008 case PGMPOOLKIND_PAE_PDPT:
3009 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3010 case PGMPOOLKIND_PAE_PDPT_PHYS:
3011 Assert(iUserTable < 4);
3012 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3013 break;
3014 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3015 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3016 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3017 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3018 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3019 case PGMPOOLKIND_PAE_PD_PHYS:
3020 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3021 break;
3022 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3023 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3024 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3025 break;
3026 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3027 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3028 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3029 break;
3030 case PGMPOOLKIND_64BIT_PML4:
3031 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3032 /* GCPhys >> PAGE_SHIFT is the index here */
3033 break;
3034 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3035 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3036 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3037 break;
3038
3039 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3040 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3041 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3042 break;
3043
3044 case PGMPOOLKIND_ROOT_NESTED:
3045 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3046 break;
3047
3048 default:
3049 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3050 break;
3051 }
3052#endif /* VBOX_STRICT */
3053
3054 /*
3055 * Clear the entry in the user page.
3056 */
3057 switch (pUserPage->enmKind)
3058 {
3059 /* 32-bit entries */
3060 case PGMPOOLKIND_32BIT_PD:
3061 case PGMPOOLKIND_32BIT_PD_PHYS:
3062 u.pau32[iUserTable] = 0;
3063 break;
3064
3065 /* 64-bit entries */
3066 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3067 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3068 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3069 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3070 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3071#if defined(IN_RC)
3072 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3073 * non-present PDPT will continue to cause page faults.
3074 */
3075 ASMReloadCR3();
3076#endif
3077 /* no break */
3078 case PGMPOOLKIND_PAE_PD_PHYS:
3079 case PGMPOOLKIND_PAE_PDPT_PHYS:
3080 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3081 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3082 case PGMPOOLKIND_64BIT_PML4:
3083 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3084 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3085 case PGMPOOLKIND_PAE_PDPT:
3086 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3087 case PGMPOOLKIND_ROOT_NESTED:
3088 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3089 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3090 u.pau64[iUserTable] = 0;
3091 break;
3092
3093 default:
3094 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3095 }
3096}
3097
3098
3099/**
3100 * Clears all users of a page.
3101 */
3102static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3103{
3104 /*
3105 * Free all the user records.
3106 */
3107 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3108
3109 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3110 uint16_t i = pPage->iUserHead;
3111 while (i != NIL_PGMPOOL_USER_INDEX)
3112 {
3113 /* Clear enter in user table. */
3114 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3115
3116 /* Free it. */
3117 const uint16_t iNext = paUsers[i].iNext;
3118 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3119 paUsers[i].iNext = pPool->iUserFreeHead;
3120 pPool->iUserFreeHead = i;
3121
3122 /* Next. */
3123 i = iNext;
3124 }
3125 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3126}
3127
3128#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3129
3130/**
3131 * Allocates a new physical cross reference extent.
3132 *
3133 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3134 * @param pVM The VM handle.
3135 * @param piPhysExt Where to store the phys ext index.
3136 */
3137PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3138{
3139 Assert(PGMIsLockOwner(pVM));
3140 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3141 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3142 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3143 {
3144 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3145 return NULL;
3146 }
3147 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3148 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3149 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3150 *piPhysExt = iPhysExt;
3151 return pPhysExt;
3152}
3153
3154
3155/**
3156 * Frees a physical cross reference extent.
3157 *
3158 * @param pVM The VM handle.
3159 * @param iPhysExt The extent to free.
3160 */
3161void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3162{
3163 Assert(PGMIsLockOwner(pVM));
3164 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3165 Assert(iPhysExt < pPool->cMaxPhysExts);
3166 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3167 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3168 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3169 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3170 pPool->iPhysExtFreeHead = iPhysExt;
3171}
3172
3173
3174/**
3175 * Frees a physical cross reference extent.
3176 *
3177 * @param pVM The VM handle.
3178 * @param iPhysExt The extent to free.
3179 */
3180void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3181{
3182 Assert(PGMIsLockOwner(pVM));
3183 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3184
3185 const uint16_t iPhysExtStart = iPhysExt;
3186 PPGMPOOLPHYSEXT pPhysExt;
3187 do
3188 {
3189 Assert(iPhysExt < pPool->cMaxPhysExts);
3190 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3191 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3192 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3193
3194 /* next */
3195 iPhysExt = pPhysExt->iNext;
3196 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3197
3198 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3199 pPool->iPhysExtFreeHead = iPhysExtStart;
3200}
3201
3202
3203/**
3204 * Insert a reference into a list of physical cross reference extents.
3205 *
3206 * @returns The new tracking data for PGMPAGE.
3207 *
3208 * @param pVM The VM handle.
3209 * @param iPhysExt The physical extent index of the list head.
3210 * @param iShwPT The shadow page table index.
3211 *
3212 */
3213static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3214{
3215 Assert(PGMIsLockOwner(pVM));
3216 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3217 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3218
3219 /* special common case. */
3220 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3221 {
3222 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3223 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3224 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d}\n", iPhysExt, iShwPT));
3225 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3226 }
3227
3228 /* general treatment. */
3229 const uint16_t iPhysExtStart = iPhysExt;
3230 unsigned cMax = 15;
3231 for (;;)
3232 {
3233 Assert(iPhysExt < pPool->cMaxPhysExts);
3234 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3235 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3236 {
3237 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3238 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3239 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3240 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3241 }
3242 if (!--cMax)
3243 {
3244 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3245 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3246 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3247 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3248 }
3249 }
3250
3251 /* add another extent to the list. */
3252 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3253 if (!pNew)
3254 {
3255 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3256 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3257 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3258 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3259 }
3260 pNew->iNext = iPhysExtStart;
3261 pNew->aidx[0] = iShwPT;
3262 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3263 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3264}
3265
3266
3267/**
3268 * Add a reference to guest physical page where extents are in use.
3269 *
3270 * @returns The new tracking data for PGMPAGE.
3271 *
3272 * @param pVM The VM handle.
3273 * @param u16 The ram range flags (top 16-bits).
3274 * @param iShwPT The shadow page table index.
3275 */
3276uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3277{
3278 pgmLock(pVM);
3279 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3280 {
3281 /*
3282 * Convert to extent list.
3283 */
3284 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3285 uint16_t iPhysExt;
3286 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3287 if (pPhysExt)
3288 {
3289 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3290 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3291 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3292 pPhysExt->aidx[1] = iShwPT;
3293 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3294 }
3295 else
3296 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3297 }
3298 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3299 {
3300 /*
3301 * Insert into the extent list.
3302 */
3303 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3304 }
3305 else
3306 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3307 pgmUnlock(pVM);
3308 return u16;
3309}
3310
3311
3312/**
3313 * Clear references to guest physical memory.
3314 *
3315 * @param pPool The pool.
3316 * @param pPage The page.
3317 * @param pPhysPage Pointer to the aPages entry in the ram range.
3318 */
3319void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3320{
3321 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3322 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3323
3324 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3325 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3326 {
3327 PVM pVM = pPool->CTX_SUFF(pVM);
3328 pgmLock(pVM);
3329
3330 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3331 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3332 do
3333 {
3334 Assert(iPhysExt < pPool->cMaxPhysExts);
3335
3336 /*
3337 * Look for the shadow page and check if it's all freed.
3338 */
3339 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3340 {
3341 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3342 {
3343 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3344
3345 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3346 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3347 {
3348 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3349 pgmUnlock(pVM);
3350 return;
3351 }
3352
3353 /* we can free the node. */
3354 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3355 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3356 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3357 {
3358 /* lonely node */
3359 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3360 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3361 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3362 }
3363 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3364 {
3365 /* head */
3366 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3367 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3368 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3369 }
3370 else
3371 {
3372 /* in list */
3373 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3374 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3375 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3376 }
3377 iPhysExt = iPhysExtNext;
3378 pgmUnlock(pVM);
3379 return;
3380 }
3381 }
3382
3383 /* next */
3384 iPhysExtPrev = iPhysExt;
3385 iPhysExt = paPhysExts[iPhysExt].iNext;
3386 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3387
3388 pgmUnlock(pVM);
3389 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3390 }
3391 else /* nothing to do */
3392 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3393}
3394
3395
3396/**
3397 * Clear references to guest physical memory.
3398 *
3399 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3400 * is assumed to be correct, so the linear search can be skipped and we can assert
3401 * at an earlier point.
3402 *
3403 * @param pPool The pool.
3404 * @param pPage The page.
3405 * @param HCPhys The host physical address corresponding to the guest page.
3406 * @param GCPhys The guest physical address corresponding to HCPhys.
3407 */
3408static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3409{
3410 /*
3411 * Walk range list.
3412 */
3413 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3414 while (pRam)
3415 {
3416 RTGCPHYS off = GCPhys - pRam->GCPhys;
3417 if (off < pRam->cb)
3418 {
3419 /* does it match? */
3420 const unsigned iPage = off >> PAGE_SHIFT;
3421 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3422#ifdef LOG_ENABLED
3423RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3424Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3425#endif
3426 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3427 {
3428 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3429 return;
3430 }
3431 break;
3432 }
3433 pRam = pRam->CTX_SUFF(pNext);
3434 }
3435 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3436}
3437
3438
3439/**
3440 * Clear references to guest physical memory.
3441 *
3442 * @param pPool The pool.
3443 * @param pPage The page.
3444 * @param HCPhys The host physical address corresponding to the guest page.
3445 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3446 */
3447static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3448{
3449 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
3450
3451 /*
3452 * Walk range list.
3453 */
3454 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3455 while (pRam)
3456 {
3457 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3458 if (off < pRam->cb)
3459 {
3460 /* does it match? */
3461 const unsigned iPage = off >> PAGE_SHIFT;
3462 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3463 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3464 {
3465 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3466 return;
3467 }
3468 break;
3469 }
3470 pRam = pRam->CTX_SUFF(pNext);
3471 }
3472
3473 /*
3474 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3475 */
3476 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3477 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3478 while (pRam)
3479 {
3480 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3481 while (iPage-- > 0)
3482 {
3483 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3484 {
3485 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3486 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3487 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3488 return;
3489 }
3490 }
3491 pRam = pRam->CTX_SUFF(pNext);
3492 }
3493
3494 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3495}
3496
3497
3498/**
3499 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3500 *
3501 * @param pPool The pool.
3502 * @param pPage The page.
3503 * @param pShwPT The shadow page table (mapping of the page).
3504 * @param pGstPT The guest page table.
3505 */
3506DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3507{
3508 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3509 if (pShwPT->a[i].n.u1Present)
3510 {
3511 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3512 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3513 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3514 if (!--pPage->cPresent)
3515 break;
3516 }
3517}
3518
3519
3520/**
3521 * Clear references to guest physical memory in a PAE / 32-bit page table.
3522 *
3523 * @param pPool The pool.
3524 * @param pPage The page.
3525 * @param pShwPT The shadow page table (mapping of the page).
3526 * @param pGstPT The guest page table (just a half one).
3527 */
3528DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3529{
3530 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3531 if (pShwPT->a[i].n.u1Present)
3532 {
3533 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
3534 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3535 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3536 }
3537}
3538
3539
3540/**
3541 * Clear references to guest physical memory in a PAE / PAE page table.
3542 *
3543 * @param pPool The pool.
3544 * @param pPage The page.
3545 * @param pShwPT The shadow page table (mapping of the page).
3546 * @param pGstPT The guest page table.
3547 */
3548DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3549{
3550 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3551 if (pShwPT->a[i].n.u1Present)
3552 {
3553 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3554 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3555 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3556 }
3557}
3558
3559
3560/**
3561 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3562 *
3563 * @param pPool The pool.
3564 * @param pPage The page.
3565 * @param pShwPT The shadow page table (mapping of the page).
3566 */
3567DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3568{
3569 RTGCPHYS GCPhys = pPage->GCPhys;
3570 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3571 if (pShwPT->a[i].n.u1Present)
3572 {
3573 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3574 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3575 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3576 }
3577}
3578
3579
3580/**
3581 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3582 *
3583 * @param pPool The pool.
3584 * @param pPage The page.
3585 * @param pShwPT The shadow page table (mapping of the page).
3586 */
3587DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3588{
3589 RTGCPHYS GCPhys = pPage->GCPhys;
3590 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3591 if (pShwPT->a[i].n.u1Present)
3592 {
3593 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3594 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3595 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3596 }
3597}
3598
3599#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3600
3601
3602/**
3603 * Clear references to shadowed pages in a 32 bits page directory.
3604 *
3605 * @param pPool The pool.
3606 * @param pPage The page.
3607 * @param pShwPD The shadow page directory (mapping of the page).
3608 */
3609DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
3610{
3611 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3612 {
3613 if ( pShwPD->a[i].n.u1Present
3614 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3615 )
3616 {
3617 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
3618 if (pSubPage)
3619 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3620 else
3621 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
3622 }
3623 }
3624}
3625
3626/**
3627 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3628 *
3629 * @param pPool The pool.
3630 * @param pPage The page.
3631 * @param pShwPD The shadow page directory (mapping of the page).
3632 */
3633DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3634{
3635 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3636 {
3637 if ( pShwPD->a[i].n.u1Present
3638 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3639 )
3640 {
3641 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3642 if (pSubPage)
3643 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3644 else
3645 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3646 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3647 }
3648 }
3649}
3650
3651/**
3652 * Clear references to shadowed pages in a PAE page directory pointer table.
3653 *
3654 * @param pPool The pool.
3655 * @param pPage The page.
3656 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3657 */
3658DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3659{
3660 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
3661 {
3662 if ( pShwPDPT->a[i].n.u1Present
3663 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
3664 )
3665 {
3666 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3667 if (pSubPage)
3668 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3669 else
3670 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3671 }
3672 }
3673}
3674
3675
3676/**
3677 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3678 *
3679 * @param pPool The pool.
3680 * @param pPage The page.
3681 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3682 */
3683DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3684{
3685 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3686 {
3687 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
3688 if (pShwPDPT->a[i].n.u1Present)
3689 {
3690 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3691 if (pSubPage)
3692 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3693 else
3694 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3695 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3696 }
3697 }
3698}
3699
3700
3701/**
3702 * Clear references to shadowed pages in a 64-bit level 4 page table.
3703 *
3704 * @param pPool The pool.
3705 * @param pPage The page.
3706 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3707 */
3708DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3709{
3710 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3711 {
3712 if (pShwPML4->a[i].n.u1Present)
3713 {
3714 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3715 if (pSubPage)
3716 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3717 else
3718 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3719 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3720 }
3721 }
3722}
3723
3724
3725/**
3726 * Clear references to shadowed pages in an EPT page table.
3727 *
3728 * @param pPool The pool.
3729 * @param pPage The page.
3730 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3731 */
3732DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3733{
3734 RTGCPHYS GCPhys = pPage->GCPhys;
3735 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3736 if (pShwPT->a[i].n.u1Present)
3737 {
3738 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3739 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3740 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3741 }
3742}
3743
3744
3745/**
3746 * Clear references to shadowed pages in an EPT page directory.
3747 *
3748 * @param pPool The pool.
3749 * @param pPage The page.
3750 * @param pShwPD The shadow page directory (mapping of the page).
3751 */
3752DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3753{
3754 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3755 {
3756 if (pShwPD->a[i].n.u1Present)
3757 {
3758 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3759 if (pSubPage)
3760 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3761 else
3762 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3763 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3764 }
3765 }
3766}
3767
3768
3769/**
3770 * Clear references to shadowed pages in an EPT page directory pointer table.
3771 *
3772 * @param pPool The pool.
3773 * @param pPage The page.
3774 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3775 */
3776DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3777{
3778 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3779 {
3780 if (pShwPDPT->a[i].n.u1Present)
3781 {
3782 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3783 if (pSubPage)
3784 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3785 else
3786 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3787 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3788 }
3789 }
3790}
3791
3792
3793/**
3794 * Clears all references made by this page.
3795 *
3796 * This includes other shadow pages and GC physical addresses.
3797 *
3798 * @param pPool The pool.
3799 * @param pPage The page.
3800 */
3801static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3802{
3803 /*
3804 * Map the shadow page and take action according to the page kind.
3805 */
3806 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
3807 switch (pPage->enmKind)
3808 {
3809#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3810 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3811 {
3812 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3813 void *pvGst;
3814 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3815 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3816 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3817 break;
3818 }
3819
3820 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3821 {
3822 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3823 void *pvGst;
3824 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3825 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3826 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3827 break;
3828 }
3829
3830 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3831 {
3832 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3833 void *pvGst;
3834 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3835 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3836 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3837 break;
3838 }
3839
3840 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3841 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3842 {
3843 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3844 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3845 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3846 break;
3847 }
3848
3849 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3850 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3851 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3852 {
3853 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3854 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3855 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3856 break;
3857 }
3858
3859#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3860 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3861 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3862 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3863 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3864 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3865 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3866 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3867 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3868 break;
3869#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3870
3871 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3872 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3873 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3874 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3875 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3876 case PGMPOOLKIND_PAE_PD_PHYS:
3877 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3878 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3879 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3880 break;
3881
3882 case PGMPOOLKIND_32BIT_PD_PHYS:
3883 case PGMPOOLKIND_32BIT_PD:
3884 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
3885 break;
3886
3887 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3888 case PGMPOOLKIND_PAE_PDPT:
3889 case PGMPOOLKIND_PAE_PDPT_PHYS:
3890 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
3891 break;
3892
3893 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3894 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3895 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3896 break;
3897
3898 case PGMPOOLKIND_64BIT_PML4:
3899 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3900 break;
3901
3902 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3903 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
3904 break;
3905
3906 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3907 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
3908 break;
3909
3910 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3911 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
3912 break;
3913
3914 default:
3915 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3916 }
3917
3918 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3919 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3920 ASMMemZeroPage(pvShw);
3921 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3922 pPage->fZeroed = true;
3923 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
3924}
3925#endif /* PGMPOOL_WITH_USER_TRACKING */
3926
3927/**
3928 * Flushes a pool page.
3929 *
3930 * This moves the page to the free list after removing all user references to it.
3931 *
3932 * @returns VBox status code.
3933 * @retval VINF_SUCCESS on success.
3934 * @param pPool The pool.
3935 * @param HCPhys The HC physical address of the shadow page.
3936 */
3937int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3938{
3939 PVM pVM = pPool->CTX_SUFF(pVM);
3940
3941 int rc = VINF_SUCCESS;
3942 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3943 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
3944 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
3945
3946 /*
3947 * Quietly reject any attempts at flushing any of the special root pages.
3948 */
3949 if (pPage->idx < PGMPOOL_IDX_FIRST)
3950 {
3951 AssertFailed(); /* can no longer happen */
3952 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
3953 return VINF_SUCCESS;
3954 }
3955
3956 pgmLock(pVM);
3957
3958 /*
3959 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
3960 */
3961 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
3962 {
3963 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
3964 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
3965 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
3966 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
3967 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
3968 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
3969 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
3970 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
3971 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
3972 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
3973 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
3974 pgmUnlock(pVM);
3975 return VINF_SUCCESS;
3976 }
3977
3978#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3979 /* Start a subset so we won't run out of mapping space. */
3980 PVMCPU pVCpu = VMMGetCpu(pVM);
3981 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3982#endif
3983
3984 /*
3985 * Mark the page as being in need of a ASMMemZeroPage().
3986 */
3987 pPage->fZeroed = false;
3988
3989#ifdef PGMPOOL_WITH_USER_TRACKING
3990 /*
3991 * Clear the page.
3992 */
3993 pgmPoolTrackClearPageUsers(pPool, pPage);
3994 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3995 pgmPoolTrackDeref(pPool, pPage);
3996 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3997#endif
3998
3999#ifdef PGMPOOL_WITH_CACHE
4000 /*
4001 * Flush it from the cache.
4002 */
4003 pgmPoolCacheFlushPage(pPool, pPage);
4004#endif /* PGMPOOL_WITH_CACHE */
4005
4006#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4007 /* Heavy stuff done. */
4008 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4009#endif
4010
4011#ifdef PGMPOOL_WITH_MONITORING
4012 /*
4013 * Deregistering the monitoring.
4014 */
4015 if (pPage->fMonitored)
4016 rc = pgmPoolMonitorFlush(pPool, pPage);
4017#endif
4018
4019 /*
4020 * Free the page.
4021 */
4022 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4023 pPage->iNext = pPool->iFreeHead;
4024 pPool->iFreeHead = pPage->idx;
4025 pPage->enmKind = PGMPOOLKIND_FREE;
4026 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4027 pPage->GCPhys = NIL_RTGCPHYS;
4028 pPage->fReusedFlushPending = false;
4029
4030 pPool->cUsedPages--;
4031 pgmUnlock(pVM);
4032 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4033 return rc;
4034}
4035
4036
4037/**
4038 * Frees a usage of a pool page.
4039 *
4040 * The caller is responsible to updating the user table so that it no longer
4041 * references the shadow page.
4042 *
4043 * @param pPool The pool.
4044 * @param HCPhys The HC physical address of the shadow page.
4045 * @param iUser The shadow page pool index of the user table.
4046 * @param iUserTable The index into the user table (shadowed).
4047 */
4048void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4049{
4050 PVM pVM = pPool->CTX_SUFF(pVM);
4051
4052 STAM_PROFILE_START(&pPool->StatFree, a);
4053 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4054 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4055 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4056 pgmLock(pVM);
4057#ifdef PGMPOOL_WITH_USER_TRACKING
4058 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4059#endif
4060#ifdef PGMPOOL_WITH_CACHE
4061 if (!pPage->fCached)
4062#endif
4063 pgmPoolFlushPage(pPool, pPage);
4064 pgmUnlock(pVM);
4065 STAM_PROFILE_STOP(&pPool->StatFree, a);
4066}
4067
4068
4069/**
4070 * Makes one or more free page free.
4071 *
4072 * @returns VBox status code.
4073 * @retval VINF_SUCCESS on success.
4074 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4075 *
4076 * @param pPool The pool.
4077 * @param enmKind Page table kind
4078 * @param iUser The user of the page.
4079 */
4080static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4081{
4082 PVM pVM = pPool->CTX_SUFF(pVM);
4083
4084 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4085
4086 /*
4087 * If the pool isn't full grown yet, expand it.
4088 */
4089 if ( pPool->cCurPages < pPool->cMaxPages
4090#if defined(IN_RC)
4091 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4092 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4093 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4094#endif
4095 )
4096 {
4097 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4098#ifdef IN_RING3
4099 int rc = PGMR3PoolGrow(pVM);
4100#else
4101 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4102#endif
4103 if (RT_FAILURE(rc))
4104 return rc;
4105 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4106 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4107 return VINF_SUCCESS;
4108 }
4109
4110#ifdef PGMPOOL_WITH_CACHE
4111 /*
4112 * Free one cached page.
4113 */
4114 return pgmPoolCacheFreeOne(pPool, iUser);
4115#else
4116 /*
4117 * Flush the pool.
4118 *
4119 * If we have tracking enabled, it should be possible to come up with
4120 * a cheap replacement strategy...
4121 */
4122 /* @todo This path no longer works (CR3 root pages will be flushed)!! */
4123 AssertCompileFailed();
4124 Assert(!CPUMIsGuestInLongMode(pVM));
4125 pgmPoolFlushAllInt(pPool);
4126 return VERR_PGM_POOL_FLUSHED;
4127#endif
4128}
4129
4130/**
4131 * Allocates a page from the pool.
4132 *
4133 * This page may actually be a cached page and not in need of any processing
4134 * on the callers part.
4135 *
4136 * @returns VBox status code.
4137 * @retval VINF_SUCCESS if a NEW page was allocated.
4138 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4139 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4140 * @param pVM The VM handle.
4141 * @param GCPhys The GC physical address of the page we're gonna shadow.
4142 * For 4MB and 2MB PD entries, it's the first address the
4143 * shadow PT is covering.
4144 * @param enmKind The kind of mapping.
4145 * @param enmAccess Access type for the mapping (only relevant for big pages)
4146 * @param iUser The shadow page pool index of the user table.
4147 * @param iUserTable The index into the user table (shadowed).
4148 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4149 * @param fLockPage Lock the page
4150 */
4151int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4152{
4153 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4154 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4155 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4156 *ppPage = NULL;
4157 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4158 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4159 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4160
4161 pgmLock(pVM);
4162
4163#ifdef PGMPOOL_WITH_CACHE
4164 if (pPool->fCacheEnabled)
4165 {
4166 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4167 if (RT_SUCCESS(rc2))
4168 {
4169 if (fLockPage)
4170 pgmPoolLockPage(pPool, *ppPage);
4171 pgmUnlock(pVM);
4172 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4173 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4174 return rc2;
4175 }
4176 }
4177#endif
4178
4179 /*
4180 * Allocate a new one.
4181 */
4182 int rc = VINF_SUCCESS;
4183 uint16_t iNew = pPool->iFreeHead;
4184 if (iNew == NIL_PGMPOOL_IDX)
4185 {
4186 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4187 if (RT_FAILURE(rc))
4188 {
4189 pgmUnlock(pVM);
4190 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4191 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4192 return rc;
4193 }
4194 iNew = pPool->iFreeHead;
4195 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4196 }
4197
4198 /* unlink the free head */
4199 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4200 pPool->iFreeHead = pPage->iNext;
4201 pPage->iNext = NIL_PGMPOOL_IDX;
4202
4203 /*
4204 * Initialize it.
4205 */
4206 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4207 pPage->enmKind = enmKind;
4208 pPage->enmAccess = enmAccess;
4209 pPage->GCPhys = GCPhys;
4210 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4211 pPage->fMonitored = false;
4212 pPage->fCached = false;
4213 pPage->fReusedFlushPending = false;
4214#ifdef PGMPOOL_WITH_MONITORING
4215 pPage->cModifications = 0;
4216 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4217 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4218#else
4219 pPage->fCR3Mix = false;
4220#endif
4221#ifdef PGMPOOL_WITH_USER_TRACKING
4222 pPage->cPresent = 0;
4223 pPage->iFirstPresent = ~0;
4224 pPage->pvLastAccessHandlerFault = 0;
4225 pPage->cLastAccessHandlerCount = 0;
4226 pPage->pvLastAccessHandlerRip = 0;
4227
4228 /*
4229 * Insert into the tracking and cache. If this fails, free the page.
4230 */
4231 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4232 if (RT_FAILURE(rc3))
4233 {
4234 pPool->cUsedPages--;
4235 pPage->enmKind = PGMPOOLKIND_FREE;
4236 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4237 pPage->GCPhys = NIL_RTGCPHYS;
4238 pPage->iNext = pPool->iFreeHead;
4239 pPool->iFreeHead = pPage->idx;
4240 pgmUnlock(pVM);
4241 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4242 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4243 return rc3;
4244 }
4245#endif /* PGMPOOL_WITH_USER_TRACKING */
4246
4247 /*
4248 * Commit the allocation, clear the page and return.
4249 */
4250#ifdef VBOX_WITH_STATISTICS
4251 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4252 pPool->cUsedPagesHigh = pPool->cUsedPages;
4253#endif
4254
4255 if (!pPage->fZeroed)
4256 {
4257 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4258 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4259 ASMMemZeroPage(pv);
4260 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4261 }
4262
4263 *ppPage = pPage;
4264 if (fLockPage)
4265 pgmPoolLockPage(pPool, pPage);
4266 pgmUnlock(pVM);
4267 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4268 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4269 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4270 return rc;
4271}
4272
4273
4274/**
4275 * Frees a usage of a pool page.
4276 *
4277 * @param pVM The VM handle.
4278 * @param HCPhys The HC physical address of the shadow page.
4279 * @param iUser The shadow page pool index of the user table.
4280 * @param iUserTable The index into the user table (shadowed).
4281 */
4282void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4283{
4284 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4285 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4286 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4287}
4288
4289/**
4290 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4291 *
4292 * @returns Pointer to the shadow page structure.
4293 * @param pPool The pool.
4294 * @param HCPhys The HC physical address of the shadow page.
4295 */
4296PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4297{
4298 PVM pVM = pPool->CTX_SUFF(pVM);
4299
4300 Assert(PGMIsLockOwner(pVM));
4301
4302 /*
4303 * Look up the page.
4304 */
4305 pgmLock(pVM);
4306 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4307 pgmUnlock(pVM);
4308
4309 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4310 return pPage;
4311}
4312
4313
4314#ifdef IN_RING3
4315/**
4316 * Flushes the entire cache.
4317 *
4318 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4319 * and execute this CR3 flush.
4320 *
4321 * @param pPool The pool.
4322 */
4323void pgmR3PoolReset(PVM pVM)
4324{
4325 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4326
4327 Assert(PGMIsLockOwner(pVM));
4328 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
4329 LogFlow(("pgmPoolFlushAllInt:\n"));
4330
4331 /*
4332 * If there are no pages in the pool, there is nothing to do.
4333 */
4334 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4335 {
4336 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4337 return;
4338 }
4339
4340 /*
4341 * Exit the shadow mode since we're going to clear everything,
4342 * including the root page.
4343 */
4344 for (unsigned i=0;i<pVM->cCPUs;i++)
4345 {
4346 PVMCPU pVCpu = &pVM->aCpus[i];
4347 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
4348 }
4349
4350 /*
4351 * Nuke the free list and reinsert all pages into it.
4352 */
4353 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4354 {
4355 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4356
4357 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
4358#ifdef PGMPOOL_WITH_MONITORING
4359 if (pPage->fMonitored)
4360 pgmPoolMonitorFlush(pPool, pPage);
4361 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4362 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4363 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4364 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4365 pPage->cModifications = 0;
4366#endif
4367 pPage->GCPhys = NIL_RTGCPHYS;
4368 pPage->enmKind = PGMPOOLKIND_FREE;
4369 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4370 Assert(pPage->idx == i);
4371 pPage->iNext = i + 1;
4372 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4373 pPage->fSeenNonGlobal = false;
4374 pPage->fMonitored = false;
4375 pPage->fCached = false;
4376 pPage->fReusedFlushPending = false;
4377#ifdef PGMPOOL_WITH_USER_TRACKING
4378 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4379#else
4380 pPage->fCR3Mix = false;
4381#endif
4382#ifdef PGMPOOL_WITH_CACHE
4383 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4384 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4385#endif
4386 pPage->cLocked = 0;
4387 }
4388 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4389 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4390 pPool->cUsedPages = 0;
4391
4392#ifdef PGMPOOL_WITH_USER_TRACKING
4393 /*
4394 * Zap and reinitialize the user records.
4395 */
4396 pPool->cPresent = 0;
4397 pPool->iUserFreeHead = 0;
4398 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4399 const unsigned cMaxUsers = pPool->cMaxUsers;
4400 for (unsigned i = 0; i < cMaxUsers; i++)
4401 {
4402 paUsers[i].iNext = i + 1;
4403 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4404 paUsers[i].iUserTable = 0xfffffffe;
4405 }
4406 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4407#endif
4408
4409#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4410 /*
4411 * Clear all the GCPhys links and rebuild the phys ext free list.
4412 */
4413 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
4414 pRam;
4415 pRam = pRam->CTX_SUFF(pNext))
4416 {
4417 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4418 while (iPage-- > 0)
4419 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
4420 }
4421
4422 pPool->iPhysExtFreeHead = 0;
4423 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4424 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4425 for (unsigned i = 0; i < cMaxPhysExts; i++)
4426 {
4427 paPhysExts[i].iNext = i + 1;
4428 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4429 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4430 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4431 }
4432 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4433#endif
4434
4435#ifdef PGMPOOL_WITH_MONITORING
4436 /*
4437 * Just zap the modified list.
4438 */
4439 pPool->cModifiedPages = 0;
4440 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4441#endif
4442
4443#ifdef PGMPOOL_WITH_CACHE
4444 /*
4445 * Clear the GCPhys hash and the age list.
4446 */
4447 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4448 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4449 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4450 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4451#endif
4452
4453 /*
4454 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4455 */
4456 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4457 {
4458 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4459 pPage->iNext = NIL_PGMPOOL_IDX;
4460#ifdef PGMPOOL_WITH_MONITORING
4461 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4462 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4463 pPage->cModifications = 0;
4464 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4465 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4466 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4467 if (pPage->fMonitored)
4468 {
4469 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4470 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4471 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4472 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4473 pPool->pszAccessHandler);
4474 AssertFatalRCSuccess(rc);
4475# ifdef PGMPOOL_WITH_CACHE
4476 pgmPoolHashInsert(pPool, pPage);
4477# endif
4478 }
4479#endif
4480#ifdef PGMPOOL_WITH_USER_TRACKING
4481 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4482#endif
4483#ifdef PGMPOOL_WITH_CACHE
4484 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4485 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4486#endif
4487 }
4488
4489 for (unsigned i=0;i<pVM->cCPUs;i++)
4490 {
4491 PVMCPU pVCpu = &pVM->aCpus[i];
4492 /*
4493 * Re-enter the shadowing mode and assert Sync CR3 FF.
4494 */
4495 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
4496 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
4497 }
4498
4499 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4500}
4501#endif /* IN_RING3 */
4502
4503#ifdef LOG_ENABLED
4504static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
4505{
4506 switch(enmKind)
4507 {
4508 case PGMPOOLKIND_INVALID:
4509 return "PGMPOOLKIND_INVALID";
4510 case PGMPOOLKIND_FREE:
4511 return "PGMPOOLKIND_FREE";
4512 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4513 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4514 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4515 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4516 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4517 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4518 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4519 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4520 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4521 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4522 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4523 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
4524 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4525 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
4526 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4527 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
4528 case PGMPOOLKIND_32BIT_PD:
4529 return "PGMPOOLKIND_32BIT_PD";
4530 case PGMPOOLKIND_32BIT_PD_PHYS:
4531 return "PGMPOOLKIND_32BIT_PD_PHYS";
4532 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4533 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
4534 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4535 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
4536 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4537 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
4538 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4539 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
4540 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4541 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
4542 case PGMPOOLKIND_PAE_PD_PHYS:
4543 return "PGMPOOLKIND_PAE_PD_PHYS";
4544 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4545 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
4546 case PGMPOOLKIND_PAE_PDPT:
4547 return "PGMPOOLKIND_PAE_PDPT";
4548 case PGMPOOLKIND_PAE_PDPT_PHYS:
4549 return "PGMPOOLKIND_PAE_PDPT_PHYS";
4550 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4551 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
4552 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4553 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
4554 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4555 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
4556 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4557 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
4558 case PGMPOOLKIND_64BIT_PML4:
4559 return "PGMPOOLKIND_64BIT_PML4";
4560 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4561 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
4562 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4563 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
4564 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4565 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
4566 case PGMPOOLKIND_ROOT_NESTED:
4567 return "PGMPOOLKIND_ROOT_NESTED";
4568 }
4569 return "Unknown kind!";
4570}
4571#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette