VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 20763

Last change on this file since 20763 was 20763, checked in by vboxsync, 16 years ago

Paranoia

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 162.5 KB
Line 
1/* $Id: PGMAllPool.cpp 20763 2009-06-22 11:10:35Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48RT_C_DECLS_BEGIN
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50#ifdef PGMPOOL_WITH_USER_TRACKING
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54#endif
55#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
56static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
57#endif
58#ifdef PGMPOOL_WITH_CACHE
59static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
60#endif
61#ifdef PGMPOOL_WITH_MONITORING
62static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
63#endif
64#ifndef IN_RING3
65DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
66#endif
67#ifdef LOG_ENABLED
68static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
69#endif
70
71void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs);
72void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt);
73int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
74PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
75void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
76void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
77
78RT_C_DECLS_END
79
80
81/**
82 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
83 *
84 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
85 * @param enmKind The page kind.
86 */
87DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
88{
89 switch (enmKind)
90 {
91 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
92 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
93 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
94 return true;
95 default:
96 return false;
97 }
98}
99
100/** @def PGMPOOL_PAGE_2_LOCKED_PTR
101 * Maps a pool page pool into the current context and lock it (RC only).
102 *
103 * @returns VBox status code.
104 * @param pVM The VM handle.
105 * @param pPage The pool page.
106 *
107 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
108 * small page window employeed by that function. Be careful.
109 * @remark There is no need to assert on the result.
110 */
111#if defined(IN_RC)
112DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
113{
114 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
115
116 /* Make sure the dynamic mapping will not be reused. */
117 if (pv)
118 PGMDynLockHCPage(pVM, (uint8_t *)pv);
119
120 return pv;
121}
122#else
123# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
124#endif
125
126/** @def PGMPOOL_UNLOCK_PTR
127 * Unlock a previously locked dynamic caching (RC only).
128 *
129 * @returns VBox status code.
130 * @param pVM The VM handle.
131 * @param pPage The pool page.
132 *
133 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
134 * small page window employeed by that function. Be careful.
135 * @remark There is no need to assert on the result.
136 */
137#if defined(IN_RC)
138DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
139{
140 if (pvPage)
141 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
142}
143#else
144# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
145#endif
146
147
148#ifdef PGMPOOL_WITH_MONITORING
149/**
150 * Determin the size of a write instruction.
151 * @returns number of bytes written.
152 * @param pDis The disassembler state.
153 */
154static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
155{
156 /*
157 * This is very crude and possibly wrong for some opcodes,
158 * but since it's not really supposed to be called we can
159 * probably live with that.
160 */
161 return DISGetParamSize(pDis, &pDis->param1);
162}
163
164
165/**
166 * Flushes a chain of pages sharing the same access monitor.
167 *
168 * @returns VBox status code suitable for scheduling.
169 * @param pPool The pool.
170 * @param pPage A page in the chain.
171 */
172int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
173{
174 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
175
176 /*
177 * Find the list head.
178 */
179 uint16_t idx = pPage->idx;
180 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
181 {
182 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
183 {
184 idx = pPage->iMonitoredPrev;
185 Assert(idx != pPage->idx);
186 pPage = &pPool->aPages[idx];
187 }
188 }
189
190 /*
191 * Iterate the list flushing each shadow page.
192 */
193 int rc = VINF_SUCCESS;
194 for (;;)
195 {
196 idx = pPage->iMonitoredNext;
197 Assert(idx != pPage->idx);
198 if (pPage->idx >= PGMPOOL_IDX_FIRST)
199 {
200 int rc2 = pgmPoolFlushPage(pPool, pPage);
201 AssertRC(rc2);
202 }
203 /* next */
204 if (idx == NIL_PGMPOOL_IDX)
205 break;
206 pPage = &pPool->aPages[idx];
207 }
208 return rc;
209}
210
211
212/**
213 * Wrapper for getting the current context pointer to the entry being modified.
214 *
215 * @returns VBox status code suitable for scheduling.
216 * @param pVM VM Handle.
217 * @param pvDst Destination address
218 * @param pvSrc Source guest virtual address.
219 * @param GCPhysSrc The source guest physical address.
220 * @param cb Size of data to read
221 */
222DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
223{
224#if defined(IN_RING3)
225 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
226 return VINF_SUCCESS;
227#else
228 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
229 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
230#endif
231}
232
233/**
234 * Process shadow entries before they are changed by the guest.
235 *
236 * For PT entries we will clear them. For PD entries, we'll simply check
237 * for mapping conflicts and set the SyncCR3 FF if found.
238 *
239 * @param pVCpu VMCPU handle
240 * @param pPool The pool.
241 * @param pPage The head page.
242 * @param GCPhysFault The guest physical fault address.
243 * @param uAddress In R0 and GC this is the guest context fault address (flat).
244 * In R3 this is the host context 'fault' address.
245 * @param pDis The disassembler state for figuring out the write size.
246 * This need not be specified if the caller knows we won't do cross entry accesses.
247 */
248void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, PDISCPUSTATE pDis)
249{
250 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
251 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
252 const unsigned cbWrite = pDis ? pgmPoolDisasWriteSize(pDis) : 0;
253 PVM pVM = pPool->CTX_SUFF(pVM);
254
255 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%s cbWrite=%d\n", (RTGCPTR)pvAddress, GCPhysFault, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
256 for (;;)
257 {
258 union
259 {
260 void *pv;
261 PX86PT pPT;
262 PX86PTPAE pPTPae;
263 PX86PD pPD;
264 PX86PDPAE pPDPae;
265 PX86PDPT pPDPT;
266 PX86PML4 pPML4;
267 } uShw;
268
269 uShw.pv = NULL;
270 switch (pPage->enmKind)
271 {
272 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
273 {
274 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
275 const unsigned iShw = off / sizeof(X86PTE);
276 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
277 if (uShw.pPT->a[iShw].n.u1Present)
278 {
279# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
280 X86PTE GstPte;
281
282 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
283 AssertRC(rc);
284 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
285 pgmPoolTracDerefGCPhysHint(pPool, pPage,
286 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
287 GstPte.u & X86_PTE_PG_MASK);
288# endif
289 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
290 }
291 break;
292 }
293
294 /* page/2 sized */
295 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
296 {
297 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
298 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
299 {
300 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
301 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
302 if (uShw.pPTPae->a[iShw].n.u1Present)
303 {
304# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
305 X86PTE GstPte;
306 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
307 AssertRC(rc);
308
309 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
310 pgmPoolTracDerefGCPhysHint(pPool, pPage,
311 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
312 GstPte.u & X86_PTE_PG_MASK);
313# endif
314 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
315 }
316 }
317 break;
318 }
319
320 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
321 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
322 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
323 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
324 {
325 unsigned iGst = off / sizeof(X86PDE);
326 unsigned iShwPdpt = iGst / 256;
327 unsigned iShw = (iGst % 256) * 2;
328 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
329
330 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
331 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
332 {
333 for (unsigned i = 0; i < 2; i++)
334 {
335# ifndef IN_RING0
336 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
337 {
338 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
339 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
340 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
341 break;
342 }
343 else
344# endif /* !IN_RING0 */
345 if (uShw.pPDPae->a[iShw+i].n.u1Present)
346 {
347 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
348 pgmPoolFree(pVM,
349 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
350 pPage->idx,
351 iShw + i);
352 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
353 }
354
355 /* paranoia / a bit assumptive. */
356 if ( pDis
357 && (off & 3)
358 && (off & 3) + cbWrite > 4)
359 {
360 const unsigned iShw2 = iShw + 2 + i;
361 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
362 {
363# ifndef IN_RING0
364 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
365 {
366 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
367 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
368 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
369 break;
370 }
371 else
372# endif /* !IN_RING0 */
373 if (uShw.pPDPae->a[iShw2].n.u1Present)
374 {
375 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
376 pgmPoolFree(pVM,
377 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
378 pPage->idx,
379 iShw2);
380 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
381 }
382 }
383 }
384 }
385 }
386 break;
387 }
388
389 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
390 {
391 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
392 const unsigned iShw = off / sizeof(X86PTEPAE);
393 if (uShw.pPTPae->a[iShw].n.u1Present)
394 {
395# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
396 X86PTEPAE GstPte;
397 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
398 AssertRC(rc);
399
400 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
401 pgmPoolTracDerefGCPhysHint(pPool, pPage,
402 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
403 GstPte.u & X86_PTE_PAE_PG_MASK);
404# endif
405 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
406 }
407
408 /* paranoia / a bit assumptive. */
409 if ( pDis
410 && (off & 7)
411 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
412 {
413 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
414 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
415
416 if (uShw.pPTPae->a[iShw2].n.u1Present)
417 {
418# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
419 X86PTEPAE GstPte;
420# ifdef IN_RING3
421 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
422# else
423 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
424# endif
425 AssertRC(rc);
426 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
427 pgmPoolTracDerefGCPhysHint(pPool, pPage,
428 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
429 GstPte.u & X86_PTE_PAE_PG_MASK);
430# endif
431 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
432 }
433 }
434 break;
435 }
436
437 case PGMPOOLKIND_32BIT_PD:
438 {
439 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
440 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
441
442 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
443# ifndef IN_RING0
444 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
445 {
446 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
447 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
448 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
449 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
450 break;
451 }
452# endif /* !IN_RING0 */
453# ifndef IN_RING0
454 else
455# endif /* !IN_RING0 */
456 {
457 if (uShw.pPD->a[iShw].n.u1Present)
458 {
459 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
460 pgmPoolFree(pVM,
461 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
462 pPage->idx,
463 iShw);
464 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
465 }
466 }
467 /* paranoia / a bit assumptive. */
468 if ( pDis
469 && (off & 3)
470 && (off & 3) + cbWrite > sizeof(X86PTE))
471 {
472 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
473 if ( iShw2 != iShw
474 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
475 {
476# ifndef IN_RING0
477 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
478 {
479 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
480 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
481 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
482 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
483 break;
484 }
485# endif /* !IN_RING0 */
486# ifndef IN_RING0
487 else
488# endif /* !IN_RING0 */
489 {
490 if (uShw.pPD->a[iShw2].n.u1Present)
491 {
492 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
493 pgmPoolFree(pVM,
494 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
495 pPage->idx,
496 iShw2);
497 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
498 }
499 }
500 }
501 }
502#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
503 if ( uShw.pPD->a[iShw].n.u1Present
504 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
505 {
506 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
507# ifdef IN_RC /* TLB load - we're pushing things a bit... */
508 ASMProbeReadByte(pvAddress);
509# endif
510 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
511 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
512 }
513#endif
514 break;
515 }
516
517 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
518 {
519 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
520 const unsigned iShw = off / sizeof(X86PDEPAE);
521#ifndef IN_RING0
522 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
523 {
524 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
525 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
526 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
527 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
528 break;
529 }
530#endif /* !IN_RING0 */
531 /*
532 * Causes trouble when the guest uses a PDE to refer to the whole page table level
533 * structure. (Invalidate here; faults later on when it tries to change the page
534 * table entries -> recheck; probably only applies to the RC case.)
535 */
536# ifndef IN_RING0
537 else
538# endif /* !IN_RING0 */
539 {
540 if (uShw.pPDPae->a[iShw].n.u1Present)
541 {
542 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
543 pgmPoolFree(pVM,
544 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
545 pPage->idx,
546 iShw);
547 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
548 }
549 }
550 /* paranoia / a bit assumptive. */
551 if ( pDis
552 && (off & 7)
553 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
554 {
555 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
556 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
557
558#ifndef IN_RING0
559 if ( iShw2 != iShw
560 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
561 {
562 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
563 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
564 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
565 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
566 break;
567 }
568#endif /* !IN_RING0 */
569# ifndef IN_RING0
570 else
571# endif /* !IN_RING0 */
572 if (uShw.pPDPae->a[iShw2].n.u1Present)
573 {
574 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
575 pgmPoolFree(pVM,
576 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
577 pPage->idx,
578 iShw2);
579 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
580 }
581 }
582 break;
583 }
584
585 case PGMPOOLKIND_PAE_PDPT:
586 {
587 /*
588 * Hopefully this doesn't happen very often:
589 * - touching unused parts of the page
590 * - messing with the bits of pd pointers without changing the physical address
591 */
592 /* PDPT roots are not page aligned; 32 byte only! */
593 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
594
595 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
596 const unsigned iShw = offPdpt / sizeof(X86PDPE);
597 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
598 {
599# ifndef IN_RING0
600 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
601 {
602 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
603 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
604 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
605 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
606 break;
607 }
608# endif /* !IN_RING0 */
609# ifndef IN_RING0
610 else
611# endif /* !IN_RING0 */
612 if (uShw.pPDPT->a[iShw].n.u1Present)
613 {
614 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
615 pgmPoolFree(pVM,
616 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
617 pPage->idx,
618 iShw);
619 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
620 }
621
622 /* paranoia / a bit assumptive. */
623 if ( pDis
624 && (offPdpt & 7)
625 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
626 {
627 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
628 if ( iShw2 != iShw
629 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
630 {
631# ifndef IN_RING0
632 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
633 {
634 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
635 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
636 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
637 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
638 break;
639 }
640# endif /* !IN_RING0 */
641# ifndef IN_RING0
642 else
643# endif /* !IN_RING0 */
644 if (uShw.pPDPT->a[iShw2].n.u1Present)
645 {
646 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
647 pgmPoolFree(pVM,
648 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
649 pPage->idx,
650 iShw2);
651 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
652 }
653 }
654 }
655 }
656 break;
657 }
658
659#ifndef IN_RC
660 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
661 {
662 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
663 const unsigned iShw = off / sizeof(X86PDEPAE);
664 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
665 if (uShw.pPDPae->a[iShw].n.u1Present)
666 {
667 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
668 pgmPoolFree(pVM,
669 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
670 pPage->idx,
671 iShw);
672 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
673 }
674 /* paranoia / a bit assumptive. */
675 if ( pDis
676 && (off & 7)
677 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
678 {
679 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
680 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
681
682 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
683 if (uShw.pPDPae->a[iShw2].n.u1Present)
684 {
685 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
686 pgmPoolFree(pVM,
687 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
688 pPage->idx,
689 iShw2);
690 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
691 }
692 }
693 break;
694 }
695
696 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
697 {
698 /*
699 * Hopefully this doesn't happen very often:
700 * - messing with the bits of pd pointers without changing the physical address
701 */
702 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
703 {
704 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
705 const unsigned iShw = off / sizeof(X86PDPE);
706 if (uShw.pPDPT->a[iShw].n.u1Present)
707 {
708 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
709 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
710 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
711 }
712 /* paranoia / a bit assumptive. */
713 if ( pDis
714 && (off & 7)
715 && (off & 7) + cbWrite > sizeof(X86PDPE))
716 {
717 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
718 if (uShw.pPDPT->a[iShw2].n.u1Present)
719 {
720 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
721 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
722 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
723 }
724 }
725 }
726 break;
727 }
728
729 case PGMPOOLKIND_64BIT_PML4:
730 {
731 /*
732 * Hopefully this doesn't happen very often:
733 * - messing with the bits of pd pointers without changing the physical address
734 */
735 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
736 {
737 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
738 const unsigned iShw = off / sizeof(X86PDPE);
739 if (uShw.pPML4->a[iShw].n.u1Present)
740 {
741 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
742 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
743 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
744 }
745 /* paranoia / a bit assumptive. */
746 if ( pDis
747 && (off & 7)
748 && (off & 7) + cbWrite > sizeof(X86PDPE))
749 {
750 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
751 if (uShw.pPML4->a[iShw2].n.u1Present)
752 {
753 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
754 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
755 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
756 }
757 }
758 }
759 break;
760 }
761#endif /* IN_RING0 */
762
763 default:
764 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
765 }
766 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
767
768 /* next */
769 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
770 return;
771 pPage = &pPool->aPages[pPage->iMonitoredNext];
772 }
773}
774
775# ifndef IN_RING3
776/**
777 * Checks if a access could be a fork operation in progress.
778 *
779 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
780 *
781 * @returns true if it's likly that we're forking, otherwise false.
782 * @param pPool The pool.
783 * @param pDis The disassembled instruction.
784 * @param offFault The access offset.
785 */
786DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
787{
788 /*
789 * i386 linux is using btr to clear X86_PTE_RW.
790 * The functions involved are (2.6.16 source inspection):
791 * clear_bit
792 * ptep_set_wrprotect
793 * copy_one_pte
794 * copy_pte_range
795 * copy_pmd_range
796 * copy_pud_range
797 * copy_page_range
798 * dup_mmap
799 * dup_mm
800 * copy_mm
801 * copy_process
802 * do_fork
803 */
804 if ( pDis->pCurInstr->opcode == OP_BTR
805 && !(offFault & 4)
806 /** @todo Validate that the bit index is X86_PTE_RW. */
807 )
808 {
809 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
810 return true;
811 }
812 return false;
813}
814
815
816/**
817 * Determine whether the page is likely to have been reused.
818 *
819 * @returns true if we consider the page as being reused for a different purpose.
820 * @returns false if we consider it to still be a paging page.
821 * @param pVM VM Handle.
822 * @param pRegFrame Trap register frame.
823 * @param pDis The disassembly info for the faulting instruction.
824 * @param pvFault The fault address.
825 *
826 * @remark The REP prefix check is left to the caller because of STOSD/W.
827 */
828DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
829{
830#ifndef IN_RC
831 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
832 if ( HWACCMHasPendingIrq(pVM)
833 && (pRegFrame->rsp - pvFault) < 32)
834 {
835 /* Fault caused by stack writes while trying to inject an interrupt event. */
836 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
837 return true;
838 }
839#else
840 NOREF(pVM); NOREF(pvFault);
841#endif
842
843 switch (pDis->pCurInstr->opcode)
844 {
845 /* call implies the actual push of the return address faulted */
846 case OP_CALL:
847 Log4(("pgmPoolMonitorIsReused: CALL\n"));
848 return true;
849 case OP_PUSH:
850 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
851 return true;
852 case OP_PUSHF:
853 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
854 return true;
855 case OP_PUSHA:
856 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
857 return true;
858 case OP_FXSAVE:
859 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
860 return true;
861 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
862 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
863 return true;
864 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
865 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
866 return true;
867 case OP_MOVSWD:
868 case OP_STOSWD:
869 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
870 && pRegFrame->rcx >= 0x40
871 )
872 {
873 Assert(pDis->mode == CPUMODE_64BIT);
874
875 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
876 return true;
877 }
878 return false;
879 }
880 if ( (pDis->param1.flags & USE_REG_GEN32)
881 && (pDis->param1.base.reg_gen == USE_REG_ESP))
882 {
883 Log4(("pgmPoolMonitorIsReused: ESP\n"));
884 return true;
885 }
886
887 return false;
888}
889
890
891/**
892 * Flushes the page being accessed.
893 *
894 * @returns VBox status code suitable for scheduling.
895 * @param pVM The VM handle.
896 * @param pVCpu The VMCPU handle.
897 * @param pPool The pool.
898 * @param pPage The pool page (head).
899 * @param pDis The disassembly of the write instruction.
900 * @param pRegFrame The trap register frame.
901 * @param GCPhysFault The fault address as guest physical address.
902 * @param pvFault The fault address.
903 */
904static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
905 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
906{
907 /*
908 * First, do the flushing.
909 */
910 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
911
912 /*
913 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
914 */
915 uint32_t cbWritten;
916 int rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten);
917 if (RT_SUCCESS(rc2))
918 pRegFrame->rip += pDis->opsize;
919 else if (rc2 == VERR_EM_INTERPRETER)
920 {
921#ifdef IN_RC
922 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
923 {
924 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
925 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
926 rc = VINF_SUCCESS;
927 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
928 }
929 else
930#endif
931 {
932 rc = VINF_EM_RAW_EMULATE_INSTR;
933 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
934 }
935 }
936 else
937 rc = rc2;
938
939 /* See use in pgmPoolAccessHandlerSimple(). */
940 PGM_INVL_VCPU_TLBS(pVCpu);
941
942 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
943 return rc;
944
945}
946
947
948/**
949 * Handles the STOSD write accesses.
950 *
951 * @returns VBox status code suitable for scheduling.
952 * @param pVM The VM handle.
953 * @param pPool The pool.
954 * @param pPage The pool page (head).
955 * @param pDis The disassembly of the write instruction.
956 * @param pRegFrame The trap register frame.
957 * @param GCPhysFault The fault address as guest physical address.
958 * @param pvFault The fault address.
959 */
960DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
961 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
962{
963 Assert(pDis->mode == CPUMODE_32BIT);
964
965 Log3(("pgmPoolAccessHandlerSTOSD\n"));
966
967 /*
968 * Increment the modification counter and insert it into the list
969 * of modified pages the first time.
970 */
971 if (!pPage->cModifications++)
972 pgmPoolMonitorModifiedInsert(pPool, pPage);
973
974 /*
975 * Execute REP STOSD.
976 *
977 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
978 * write situation, meaning that it's safe to write here.
979 */
980 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
981 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
982 while (pRegFrame->ecx)
983 {
984#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
985 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
986 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
987 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
988#else
989 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
990#endif
991#ifdef IN_RC
992 *(uint32_t *)pu32 = pRegFrame->eax;
993#else
994 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
995#endif
996 pu32 += 4;
997 GCPhysFault += 4;
998 pRegFrame->edi += 4;
999 pRegFrame->ecx--;
1000 }
1001 pRegFrame->rip += pDis->opsize;
1002
1003#ifdef IN_RC
1004 /* See use in pgmPoolAccessHandlerSimple(). */
1005 PGM_INVL_VCPU_TLBS(pVCpu);
1006#endif
1007
1008 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1009 return VINF_SUCCESS;
1010}
1011
1012
1013/**
1014 * Handles the simple write accesses.
1015 *
1016 * @returns VBox status code suitable for scheduling.
1017 * @param pVM The VM handle.
1018 * @param pVCpu The VMCPU handle.
1019 * @param pPool The pool.
1020 * @param pPage The pool page (head).
1021 * @param pDis The disassembly of the write instruction.
1022 * @param pRegFrame The trap register frame.
1023 * @param GCPhysFault The fault address as guest physical address.
1024 * @param pvFault The fault address.
1025 */
1026DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1027 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1028{
1029 Log3(("pgmPoolAccessHandlerSimple\n"));
1030 /*
1031 * Increment the modification counter and insert it into the list
1032 * of modified pages the first time.
1033 */
1034 if (!pPage->cModifications++)
1035 pgmPoolMonitorModifiedInsert(pPool, pPage);
1036
1037 /*
1038 * Clear all the pages. ASSUMES that pvFault is readable.
1039 */
1040#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1041 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1042 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, pDis);
1043 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1044#else
1045 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, pDis);
1046#endif
1047
1048 /*
1049 * Interpret the instruction.
1050 */
1051 uint32_t cb;
1052 int rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb);
1053 if (RT_SUCCESS(rc))
1054 pRegFrame->rip += pDis->opsize;
1055 else if (rc == VERR_EM_INTERPRETER)
1056 {
1057 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1058 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1059 rc = VINF_EM_RAW_EMULATE_INSTR;
1060 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1061 }
1062
1063#ifdef IN_RC
1064 /*
1065 * Quick hack, with logging enabled we're getting stale
1066 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1067 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1068 * have to be fixed to support this. But that'll have to wait till next week.
1069 *
1070 * An alternative is to keep track of the changed PTEs together with the
1071 * GCPhys from the guest PT. This may proove expensive though.
1072 *
1073 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1074 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1075 */
1076 PGM_INVL_VCPU_TLBS(pVCpu);
1077#endif
1078
1079 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1080 return rc;
1081}
1082
1083/**
1084 * \#PF Handler callback for PT write accesses.
1085 *
1086 * @returns VBox status code (appropriate for GC return).
1087 * @param pVM VM Handle.
1088 * @param uErrorCode CPU Error code.
1089 * @param pRegFrame Trap register frame.
1090 * NULL on DMA and other non CPU access.
1091 * @param pvFault The fault address (cr2).
1092 * @param GCPhysFault The GC physical address corresponding to pvFault.
1093 * @param pvUser User argument.
1094 */
1095DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1096{
1097 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1098 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1099 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1100 PVMCPU pVCpu = VMMGetCpu(pVM);
1101
1102 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1103
1104 /*
1105 * Disassemble the faulting instruction.
1106 */
1107 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1108 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1109 AssertRCReturn(rc, rc);
1110
1111 pgmLock(pVM);
1112
1113 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1114 {
1115 /* Pool page changed while we were waiting for the lock; ignore. */
1116 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1117 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1118 pgmUnlock(pVM);
1119 return VINF_SUCCESS;
1120 }
1121
1122 /*
1123 * We should ALWAYS have the list head as user parameter. This
1124 * is because we use that page to record the changes.
1125 */
1126 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1127
1128 /*
1129 * Check if it's worth dealing with.
1130 */
1131 bool fReused = false;
1132 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1133 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1134 )
1135 && !(fReused = pgmPoolMonitorIsReused(pVM, pRegFrame, pDis, pvFault))
1136 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1137 {
1138 /*
1139 * Simple instructions, no REP prefix.
1140 */
1141 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1142 {
1143 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1144 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1145 pgmUnlock(pVM);
1146 return rc;
1147 }
1148
1149 /*
1150 * Windows is frequently doing small memset() operations (netio test 4k+).
1151 * We have to deal with these or we'll kill the cache and performance.
1152 */
1153 if ( pDis->pCurInstr->opcode == OP_STOSWD
1154 && CPUMGetGuestCPL(pVCpu, pRegFrame) == 0
1155 && pRegFrame->ecx <= 0x20
1156 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1157 && !((uintptr_t)pvFault & 3)
1158 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1159 && pDis->mode == CPUMODE_32BIT
1160 && pDis->opmode == CPUMODE_32BIT
1161 && pDis->addrmode == CPUMODE_32BIT
1162 && pDis->prefix == PREFIX_REP
1163 && !pRegFrame->eflags.Bits.u1DF
1164 )
1165 {
1166 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1167 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1168 pgmUnlock(pVM);
1169 return rc;
1170 }
1171
1172 /* REP prefix, don't bother. */
1173 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1174 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1175 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1176 }
1177
1178 /*
1179 * Not worth it, so flush it.
1180 *
1181 * If we considered it to be reused, don't go back to ring-3
1182 * to emulate failed instructions since we usually cannot
1183 * interpret then. This may be a bit risky, in which case
1184 * the reuse detection must be fixed.
1185 */
1186 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1187 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1188 rc = VINF_SUCCESS;
1189 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1190 pgmUnlock(pVM);
1191 return rc;
1192}
1193
1194# endif /* !IN_RING3 */
1195#endif /* PGMPOOL_WITH_MONITORING */
1196
1197#ifdef PGMPOOL_WITH_CACHE
1198
1199/**
1200 * Inserts a page into the GCPhys hash table.
1201 *
1202 * @param pPool The pool.
1203 * @param pPage The page.
1204 */
1205DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1206{
1207 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1208 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1209 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1210 pPage->iNext = pPool->aiHash[iHash];
1211 pPool->aiHash[iHash] = pPage->idx;
1212}
1213
1214
1215/**
1216 * Removes a page from the GCPhys hash table.
1217 *
1218 * @param pPool The pool.
1219 * @param pPage The page.
1220 */
1221DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1222{
1223 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1224 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1225 if (pPool->aiHash[iHash] == pPage->idx)
1226 pPool->aiHash[iHash] = pPage->iNext;
1227 else
1228 {
1229 uint16_t iPrev = pPool->aiHash[iHash];
1230 for (;;)
1231 {
1232 const int16_t i = pPool->aPages[iPrev].iNext;
1233 if (i == pPage->idx)
1234 {
1235 pPool->aPages[iPrev].iNext = pPage->iNext;
1236 break;
1237 }
1238 if (i == NIL_PGMPOOL_IDX)
1239 {
1240 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1241 break;
1242 }
1243 iPrev = i;
1244 }
1245 }
1246 pPage->iNext = NIL_PGMPOOL_IDX;
1247}
1248
1249
1250/**
1251 * Frees up one cache page.
1252 *
1253 * @returns VBox status code.
1254 * @retval VINF_SUCCESS on success.
1255 * @param pPool The pool.
1256 * @param iUser The user index.
1257 */
1258static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1259{
1260#ifndef IN_RC
1261 const PVM pVM = pPool->CTX_SUFF(pVM);
1262#endif
1263 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1264 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1265
1266 /*
1267 * Select one page from the tail of the age list.
1268 */
1269 PPGMPOOLPAGE pPage;
1270 for (unsigned iLoop = 0; ; iLoop++)
1271 {
1272 uint16_t iToFree = pPool->iAgeTail;
1273 if (iToFree == iUser)
1274 iToFree = pPool->aPages[iToFree].iAgePrev;
1275/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1276 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1277 {
1278 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1279 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1280 {
1281 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1282 continue;
1283 iToFree = i;
1284 break;
1285 }
1286 }
1287*/
1288 Assert(iToFree != iUser);
1289 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1290 pPage = &pPool->aPages[iToFree];
1291
1292 /*
1293 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1294 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1295 */
1296 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1297 break;
1298 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1299 pgmPoolCacheUsed(pPool, pPage);
1300 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1301 }
1302
1303 /*
1304 * Found a usable page, flush it and return.
1305 */
1306 int rc = pgmPoolFlushPage(pPool, pPage);
1307 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1308 if (rc == VINF_SUCCESS)
1309 PGM_INVL_ALL_VCPU_TLBS(pVM);
1310 return rc;
1311}
1312
1313
1314/**
1315 * Checks if a kind mismatch is really a page being reused
1316 * or if it's just normal remappings.
1317 *
1318 * @returns true if reused and the cached page (enmKind1) should be flushed
1319 * @returns false if not reused.
1320 * @param enmKind1 The kind of the cached page.
1321 * @param enmKind2 The kind of the requested page.
1322 */
1323static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1324{
1325 switch (enmKind1)
1326 {
1327 /*
1328 * Never reuse them. There is no remapping in non-paging mode.
1329 */
1330 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1331 case PGMPOOLKIND_32BIT_PD_PHYS:
1332 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1333 case PGMPOOLKIND_PAE_PD_PHYS:
1334 case PGMPOOLKIND_PAE_PDPT_PHYS:
1335 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1336 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1337 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1338 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1339 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1340 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1341 return false;
1342
1343 /*
1344 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1345 */
1346 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1347 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1348 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1349 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1350 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1351 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1352 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1353 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1354 case PGMPOOLKIND_32BIT_PD:
1355 case PGMPOOLKIND_PAE_PDPT:
1356 switch (enmKind2)
1357 {
1358 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1359 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1360 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1361 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1362 case PGMPOOLKIND_64BIT_PML4:
1363 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1364 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1365 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1366 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1367 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1368 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1369 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1370 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1371 return true;
1372 default:
1373 return false;
1374 }
1375
1376 /*
1377 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1378 */
1379 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1380 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1381 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1382 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1383 case PGMPOOLKIND_64BIT_PML4:
1384 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1385 switch (enmKind2)
1386 {
1387 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1388 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1389 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1390 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1391 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1392 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1393 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1394 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1395 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1396 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1397 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1398 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1399 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1400 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1401 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1402 return true;
1403 default:
1404 return false;
1405 }
1406
1407 /*
1408 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1409 */
1410 case PGMPOOLKIND_ROOT_NESTED:
1411 return false;
1412
1413 default:
1414 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1415 }
1416}
1417
1418
1419/**
1420 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1421 *
1422 * @returns VBox status code.
1423 * @retval VINF_PGM_CACHED_PAGE on success.
1424 * @retval VERR_FILE_NOT_FOUND if not found.
1425 * @param pPool The pool.
1426 * @param GCPhys The GC physical address of the page we're gonna shadow.
1427 * @param enmKind The kind of mapping.
1428 * @param enmAccess Access type for the mapping (only relevant for big pages)
1429 * @param iUser The shadow page pool index of the user table.
1430 * @param iUserTable The index into the user table (shadowed).
1431 * @param ppPage Where to store the pointer to the page.
1432 */
1433static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1434{
1435#ifndef IN_RC
1436 const PVM pVM = pPool->CTX_SUFF(pVM);
1437#endif
1438 /*
1439 * Look up the GCPhys in the hash.
1440 */
1441 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1442 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
1443 if (i != NIL_PGMPOOL_IDX)
1444 {
1445 do
1446 {
1447 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1448 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1449 if (pPage->GCPhys == GCPhys)
1450 {
1451 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
1452 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
1453 {
1454 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
1455 * doesn't flush it in case there are no more free use records.
1456 */
1457 pgmPoolCacheUsed(pPool, pPage);
1458
1459 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1460 if (RT_SUCCESS(rc))
1461 {
1462 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
1463 *ppPage = pPage;
1464 STAM_COUNTER_INC(&pPool->StatCacheHits);
1465 return VINF_PGM_CACHED_PAGE;
1466 }
1467 return rc;
1468 }
1469
1470 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
1471 {
1472 /*
1473 * The kind is different. In some cases we should now flush the page
1474 * as it has been reused, but in most cases this is normal remapping
1475 * of PDs as PT or big pages using the GCPhys field in a slightly
1476 * different way than the other kinds.
1477 */
1478 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1479 {
1480 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1481 pgmPoolFlushPage(pPool, pPage);
1482 PGM_INVL_VCPU_TLBS(VMMGetCpu(pVM)); /* see PT handler. */
1483 break;
1484 }
1485 }
1486 }
1487
1488 /* next */
1489 i = pPage->iNext;
1490 } while (i != NIL_PGMPOOL_IDX);
1491 }
1492
1493 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
1494 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1495 return VERR_FILE_NOT_FOUND;
1496}
1497
1498
1499/**
1500 * Inserts a page into the cache.
1501 *
1502 * @param pPool The pool.
1503 * @param pPage The cached page.
1504 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1505 */
1506static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1507{
1508 /*
1509 * Insert into the GCPhys hash if the page is fit for that.
1510 */
1511 Assert(!pPage->fCached);
1512 if (fCanBeCached)
1513 {
1514 pPage->fCached = true;
1515 pgmPoolHashInsert(pPool, pPage);
1516 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1517 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1518 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1519 }
1520 else
1521 {
1522 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1523 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1524 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1525 }
1526
1527 /*
1528 * Insert at the head of the age list.
1529 */
1530 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1531 pPage->iAgeNext = pPool->iAgeHead;
1532 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1533 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1534 else
1535 pPool->iAgeTail = pPage->idx;
1536 pPool->iAgeHead = pPage->idx;
1537}
1538
1539
1540/**
1541 * Flushes a cached page.
1542 *
1543 * @param pPool The pool.
1544 * @param pPage The cached page.
1545 */
1546static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1547{
1548 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1549
1550 /*
1551 * Remove the page from the hash.
1552 */
1553 if (pPage->fCached)
1554 {
1555 pPage->fCached = false;
1556 pgmPoolHashRemove(pPool, pPage);
1557 }
1558 else
1559 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1560
1561 /*
1562 * Remove it from the age list.
1563 */
1564 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1565 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1566 else
1567 pPool->iAgeTail = pPage->iAgePrev;
1568 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1569 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1570 else
1571 pPool->iAgeHead = pPage->iAgeNext;
1572 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1573 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1574}
1575
1576#endif /* PGMPOOL_WITH_CACHE */
1577#ifdef PGMPOOL_WITH_MONITORING
1578
1579/**
1580 * Looks for pages sharing the monitor.
1581 *
1582 * @returns Pointer to the head page.
1583 * @returns NULL if not found.
1584 * @param pPool The Pool
1585 * @param pNewPage The page which is going to be monitored.
1586 */
1587static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1588{
1589#ifdef PGMPOOL_WITH_CACHE
1590 /*
1591 * Look up the GCPhys in the hash.
1592 */
1593 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1594 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1595 if (i == NIL_PGMPOOL_IDX)
1596 return NULL;
1597 do
1598 {
1599 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1600 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1601 && pPage != pNewPage)
1602 {
1603 switch (pPage->enmKind)
1604 {
1605 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1606 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1607 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1608 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1609 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1610 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1611 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1612 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1613 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1614 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1615 case PGMPOOLKIND_64BIT_PML4:
1616 case PGMPOOLKIND_32BIT_PD:
1617 case PGMPOOLKIND_PAE_PDPT:
1618 {
1619 /* find the head */
1620 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1621 {
1622 Assert(pPage->iMonitoredPrev != pPage->idx);
1623 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1624 }
1625 return pPage;
1626 }
1627
1628 /* ignore, no monitoring. */
1629 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1630 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1631 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1632 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1633 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1634 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1635 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1636 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1637 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1638 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1639 case PGMPOOLKIND_ROOT_NESTED:
1640 case PGMPOOLKIND_PAE_PD_PHYS:
1641 case PGMPOOLKIND_PAE_PDPT_PHYS:
1642 case PGMPOOLKIND_32BIT_PD_PHYS:
1643 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1644 break;
1645 default:
1646 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1647 }
1648 }
1649
1650 /* next */
1651 i = pPage->iNext;
1652 } while (i != NIL_PGMPOOL_IDX);
1653#endif
1654 return NULL;
1655}
1656
1657
1658/**
1659 * Enabled write monitoring of a guest page.
1660 *
1661 * @returns VBox status code.
1662 * @retval VINF_SUCCESS on success.
1663 * @param pPool The pool.
1664 * @param pPage The cached page.
1665 */
1666static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1667{
1668 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1669
1670 /*
1671 * Filter out the relevant kinds.
1672 */
1673 switch (pPage->enmKind)
1674 {
1675 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1676 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1677 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1678 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1679 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1680 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1681 case PGMPOOLKIND_64BIT_PML4:
1682 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1683 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1684 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1685 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1686 case PGMPOOLKIND_32BIT_PD:
1687 case PGMPOOLKIND_PAE_PDPT:
1688 break;
1689
1690 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1691 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1692 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1693 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1694 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1695 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1696 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1697 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1698 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1699 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1700 case PGMPOOLKIND_ROOT_NESTED:
1701 /* Nothing to monitor here. */
1702 return VINF_SUCCESS;
1703
1704 case PGMPOOLKIND_32BIT_PD_PHYS:
1705 case PGMPOOLKIND_PAE_PDPT_PHYS:
1706 case PGMPOOLKIND_PAE_PD_PHYS:
1707 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1708 /* Nothing to monitor here. */
1709 return VINF_SUCCESS;
1710#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1711 break;
1712#else
1713 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1714#endif
1715 default:
1716 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1717 }
1718
1719 /*
1720 * Install handler.
1721 */
1722 int rc;
1723 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1724 if (pPageHead)
1725 {
1726 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1727 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1728 pPage->iMonitoredPrev = pPageHead->idx;
1729 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1730 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1731 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1732 pPageHead->iMonitoredNext = pPage->idx;
1733 rc = VINF_SUCCESS;
1734 }
1735 else
1736 {
1737 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1738 PVM pVM = pPool->CTX_SUFF(pVM);
1739 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1740 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1741 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1742 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1743 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1744 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1745 pPool->pszAccessHandler);
1746 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1747 * the heap size should suffice. */
1748 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
1749 Assert(!(VMMGetCpu(pVM)->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(VMMGetCpu(pVM), VMCPU_FF_PGM_SYNC_CR3));
1750 }
1751 pPage->fMonitored = true;
1752 return rc;
1753}
1754
1755
1756/**
1757 * Disables write monitoring of a guest page.
1758 *
1759 * @returns VBox status code.
1760 * @retval VINF_SUCCESS on success.
1761 * @param pPool The pool.
1762 * @param pPage The cached page.
1763 */
1764static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1765{
1766 /*
1767 * Filter out the relevant kinds.
1768 */
1769 switch (pPage->enmKind)
1770 {
1771 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1772 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1773 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1774 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1775 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1776 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1777 case PGMPOOLKIND_64BIT_PML4:
1778 case PGMPOOLKIND_32BIT_PD:
1779 case PGMPOOLKIND_PAE_PDPT:
1780 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1781 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1782 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1783 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1784 break;
1785
1786 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1787 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1788 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1789 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1790 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1791 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1792 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1793 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1794 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1795 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1796 case PGMPOOLKIND_ROOT_NESTED:
1797 case PGMPOOLKIND_PAE_PD_PHYS:
1798 case PGMPOOLKIND_PAE_PDPT_PHYS:
1799 case PGMPOOLKIND_32BIT_PD_PHYS:
1800 /* Nothing to monitor here. */
1801 return VINF_SUCCESS;
1802
1803#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1804 break;
1805#endif
1806 default:
1807 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1808 }
1809
1810 /*
1811 * Remove the page from the monitored list or uninstall it if last.
1812 */
1813 const PVM pVM = pPool->CTX_SUFF(pVM);
1814 int rc;
1815 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1816 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1817 {
1818 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1819 {
1820 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1821 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1822 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1823 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1824 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1825 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
1826 pPool->pszAccessHandler);
1827 AssertFatalRCSuccess(rc);
1828 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1829 }
1830 else
1831 {
1832 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1833 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1834 {
1835 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1836 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1837 }
1838 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1839 rc = VINF_SUCCESS;
1840 }
1841 }
1842 else
1843 {
1844 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1845 AssertFatalRC(rc);
1846#ifdef VBOX_STRICT
1847 PVMCPU pVCpu = VMMGetCpu(pVM);
1848#endif
1849 AssertMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
1850 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
1851 }
1852 pPage->fMonitored = false;
1853
1854 /*
1855 * Remove it from the list of modified pages (if in it).
1856 */
1857 pgmPoolMonitorModifiedRemove(pPool, pPage);
1858
1859 return rc;
1860}
1861
1862
1863/**
1864 * Inserts the page into the list of modified pages.
1865 *
1866 * @param pPool The pool.
1867 * @param pPage The page.
1868 */
1869void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1870{
1871 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1872 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1873 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1874 && pPool->iModifiedHead != pPage->idx,
1875 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1876 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1877 pPool->iModifiedHead, pPool->cModifiedPages));
1878
1879 pPage->iModifiedNext = pPool->iModifiedHead;
1880 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1881 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1882 pPool->iModifiedHead = pPage->idx;
1883 pPool->cModifiedPages++;
1884#ifdef VBOX_WITH_STATISTICS
1885 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1886 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1887#endif
1888}
1889
1890
1891/**
1892 * Removes the page from the list of modified pages and resets the
1893 * moficiation counter.
1894 *
1895 * @param pPool The pool.
1896 * @param pPage The page which is believed to be in the list of modified pages.
1897 */
1898static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1899{
1900 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1901 if (pPool->iModifiedHead == pPage->idx)
1902 {
1903 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1904 pPool->iModifiedHead = pPage->iModifiedNext;
1905 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1906 {
1907 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1908 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1909 }
1910 pPool->cModifiedPages--;
1911 }
1912 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1913 {
1914 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1915 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1916 {
1917 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1918 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1919 }
1920 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1921 pPool->cModifiedPages--;
1922 }
1923 else
1924 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1925 pPage->cModifications = 0;
1926}
1927
1928
1929/**
1930 * Zaps the list of modified pages, resetting their modification counters in the process.
1931 *
1932 * @param pVM The VM handle.
1933 */
1934void pgmPoolMonitorModifiedClearAll(PVM pVM)
1935{
1936 pgmLock(pVM);
1937 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1938 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1939
1940 unsigned cPages = 0; NOREF(cPages);
1941 uint16_t idx = pPool->iModifiedHead;
1942 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1943 while (idx != NIL_PGMPOOL_IDX)
1944 {
1945 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1946 idx = pPage->iModifiedNext;
1947 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1948 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1949 pPage->cModifications = 0;
1950 Assert(++cPages);
1951 }
1952 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1953 pPool->cModifiedPages = 0;
1954 pgmUnlock(pVM);
1955}
1956
1957
1958#ifdef IN_RING3
1959/**
1960 * Callback to clear all shadow pages and clear all modification counters.
1961 *
1962 * @returns VBox status code.
1963 * @param pVM The VM handle.
1964 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
1965 * @param pvUser Unused parameter.
1966 *
1967 * @remark Should only be used when monitoring is available, thus placed in
1968 * the PGMPOOL_WITH_MONITORING \#ifdef.
1969 */
1970DECLCALLBACK(int) pgmPoolClearAll(PVM pVM, PVMCPU pVCpu, void *pvUser)
1971{
1972 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1973 STAM_PROFILE_START(&pPool->StatClearAll, c);
1974 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1975 NOREF(pvUser); NOREF(pVCpu);
1976
1977 pgmLock(pVM);
1978
1979 /*
1980 * Iterate all the pages until we've encountered all that in use.
1981 * This is simple but not quite optimal solution.
1982 */
1983 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1984 unsigned cLeft = pPool->cUsedPages;
1985 unsigned iPage = pPool->cCurPages;
1986 while (--iPage >= PGMPOOL_IDX_FIRST)
1987 {
1988 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1989 if (pPage->GCPhys != NIL_RTGCPHYS)
1990 {
1991 switch (pPage->enmKind)
1992 {
1993 /*
1994 * We only care about shadow page tables.
1995 */
1996 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1997 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1998 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1999 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2000 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2001 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2002 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2003 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2004 {
2005#ifdef PGMPOOL_WITH_USER_TRACKING
2006 if (pPage->cPresent)
2007#endif
2008 {
2009 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2010 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2011 ASMMemZeroPage(pvShw);
2012 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2013#ifdef PGMPOOL_WITH_USER_TRACKING
2014 pPage->cPresent = 0;
2015 pPage->iFirstPresent = ~0;
2016#endif
2017 }
2018 }
2019 /* fall thru */
2020
2021 default:
2022 Assert(!pPage->cModifications || ++cModifiedPages);
2023 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2024 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2025 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2026 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2027 pPage->cModifications = 0;
2028 break;
2029
2030 }
2031 if (!--cLeft)
2032 break;
2033 }
2034 }
2035
2036 /* swipe the special pages too. */
2037 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2038 {
2039 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2040 if (pPage->GCPhys != NIL_RTGCPHYS)
2041 {
2042 Assert(!pPage->cModifications || ++cModifiedPages);
2043 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2044 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2045 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2046 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2047 pPage->cModifications = 0;
2048 }
2049 }
2050
2051#ifndef DEBUG_michael
2052 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2053#endif
2054 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2055 pPool->cModifiedPages = 0;
2056
2057#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2058 /*
2059 * Clear all the GCPhys links and rebuild the phys ext free list.
2060 */
2061 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2062 pRam;
2063 pRam = pRam->CTX_SUFF(pNext))
2064 {
2065 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2066 while (iPage-- > 0)
2067 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
2068 }
2069
2070 pPool->iPhysExtFreeHead = 0;
2071 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2072 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2073 for (unsigned i = 0; i < cMaxPhysExts; i++)
2074 {
2075 paPhysExts[i].iNext = i + 1;
2076 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2077 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2078 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2079 }
2080 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2081#endif
2082
2083 /* Clear the PGM_SYNC_CLEAR_PGM_POOL flag on all VCPUs to prevent redundant flushes. */
2084 for (unsigned idCpu = 0; idCpu < pVM->cCPUs; idCpu++)
2085 {
2086 PVMCPU pVCpu = &pVM->aCpus[idCpu];
2087
2088 pVCpu->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2089 }
2090
2091 pPool->cPresent = 0;
2092 pgmUnlock(pVM);
2093 PGM_INVL_ALL_VCPU_TLBS(pVM);
2094 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2095 return VINF_SUCCESS;
2096}
2097#endif /* IN_RING3 */
2098
2099
2100/**
2101 * Handle SyncCR3 pool tasks
2102 *
2103 * @returns VBox status code.
2104 * @retval VINF_SUCCESS if successfully added.
2105 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2106 * @param pVCpu The VMCPU handle.
2107 * @remark Should only be used when monitoring is available, thus placed in
2108 * the PGMPOOL_WITH_MONITORING #ifdef.
2109 */
2110int pgmPoolSyncCR3(PVMCPU pVCpu)
2111{
2112 PVM pVM = pVCpu->CTX_SUFF(pVM);
2113 LogFlow(("pgmPoolSyncCR3\n"));
2114
2115 /*
2116 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2117 * Occasionally we will have to clear all the shadow page tables because we wanted
2118 * to monitor a page which was mapped by too many shadowed page tables. This operation
2119 * sometimes refered to as a 'lightweight flush'.
2120 */
2121# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2122 if (ASMBitTestAndClear(&pVCpu->pgm.s.fSyncFlags, PGM_SYNC_CLEAR_PGM_POOL_BIT))
2123 {
2124 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmPoolClearAll, NULL);
2125 AssertRC(rc);
2126 }
2127# else /* !IN_RING3 */
2128 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2129 {
2130 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2131 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2132 return VINF_PGM_SYNC_CR3;
2133 }
2134# endif /* !IN_RING3 */
2135 else
2136 pgmPoolMonitorModifiedClearAll(pVM);
2137
2138 return VINF_SUCCESS;
2139}
2140
2141#endif /* PGMPOOL_WITH_MONITORING */
2142#ifdef PGMPOOL_WITH_USER_TRACKING
2143
2144/**
2145 * Frees up at least one user entry.
2146 *
2147 * @returns VBox status code.
2148 * @retval VINF_SUCCESS if successfully added.
2149 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2150 * @param pPool The pool.
2151 * @param iUser The user index.
2152 */
2153static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2154{
2155 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2156#ifdef PGMPOOL_WITH_CACHE
2157 /*
2158 * Just free cached pages in a braindead fashion.
2159 */
2160 /** @todo walk the age list backwards and free the first with usage. */
2161 int rc = VINF_SUCCESS;
2162 do
2163 {
2164 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2165 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2166 rc = rc2;
2167 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2168 return rc;
2169#else
2170 /*
2171 * Lazy approach.
2172 */
2173 /* @todo This path no longer works (CR3 root pages will be flushed)!! */
2174 AssertCompileFailed();
2175 Assert(!CPUMIsGuestInLongMode(pVM));
2176 pgmPoolFlushAllInt(pPool);
2177 return VERR_PGM_POOL_FLUSHED;
2178#endif
2179}
2180
2181
2182/**
2183 * Inserts a page into the cache.
2184 *
2185 * This will create user node for the page, insert it into the GCPhys
2186 * hash, and insert it into the age list.
2187 *
2188 * @returns VBox status code.
2189 * @retval VINF_SUCCESS if successfully added.
2190 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2191 * @param pPool The pool.
2192 * @param pPage The cached page.
2193 * @param GCPhys The GC physical address of the page we're gonna shadow.
2194 * @param iUser The user index.
2195 * @param iUserTable The user table index.
2196 */
2197DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2198{
2199 int rc = VINF_SUCCESS;
2200 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2201
2202 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2203
2204#ifdef VBOX_STRICT
2205 /*
2206 * Check that the entry doesn't already exists.
2207 */
2208 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2209 {
2210 uint16_t i = pPage->iUserHead;
2211 do
2212 {
2213 Assert(i < pPool->cMaxUsers);
2214 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2215 i = paUsers[i].iNext;
2216 } while (i != NIL_PGMPOOL_USER_INDEX);
2217 }
2218#endif
2219
2220 /*
2221 * Find free a user node.
2222 */
2223 uint16_t i = pPool->iUserFreeHead;
2224 if (i == NIL_PGMPOOL_USER_INDEX)
2225 {
2226 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2227 if (RT_FAILURE(rc))
2228 return rc;
2229 i = pPool->iUserFreeHead;
2230 }
2231
2232 /*
2233 * Unlink the user node from the free list,
2234 * initialize and insert it into the user list.
2235 */
2236 pPool->iUserFreeHead = paUsers[i].iNext;
2237 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2238 paUsers[i].iUser = iUser;
2239 paUsers[i].iUserTable = iUserTable;
2240 pPage->iUserHead = i;
2241
2242 /*
2243 * Insert into cache and enable monitoring of the guest page if enabled.
2244 *
2245 * Until we implement caching of all levels, including the CR3 one, we'll
2246 * have to make sure we don't try monitor & cache any recursive reuse of
2247 * a monitored CR3 page. Because all windows versions are doing this we'll
2248 * have to be able to do combined access monitoring, CR3 + PT and
2249 * PD + PT (guest PAE).
2250 *
2251 * Update:
2252 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2253 */
2254#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2255# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2256 const bool fCanBeMonitored = true;
2257# else
2258 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2259 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2260 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2261# endif
2262# ifdef PGMPOOL_WITH_CACHE
2263 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2264# endif
2265 if (fCanBeMonitored)
2266 {
2267# ifdef PGMPOOL_WITH_MONITORING
2268 rc = pgmPoolMonitorInsert(pPool, pPage);
2269 AssertRC(rc);
2270 }
2271# endif
2272#endif /* PGMPOOL_WITH_MONITORING */
2273 return rc;
2274}
2275
2276
2277# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2278/**
2279 * Adds a user reference to a page.
2280 *
2281 * This will move the page to the head of the
2282 *
2283 * @returns VBox status code.
2284 * @retval VINF_SUCCESS if successfully added.
2285 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2286 * @param pPool The pool.
2287 * @param pPage The cached page.
2288 * @param iUser The user index.
2289 * @param iUserTable The user table.
2290 */
2291static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2292{
2293 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2294
2295 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2296
2297# ifdef VBOX_STRICT
2298 /*
2299 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2300 */
2301 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2302 {
2303 uint16_t i = pPage->iUserHead;
2304 do
2305 {
2306 Assert(i < pPool->cMaxUsers);
2307 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2308 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2309 i = paUsers[i].iNext;
2310 } while (i != NIL_PGMPOOL_USER_INDEX);
2311 }
2312# endif
2313
2314 /*
2315 * Allocate a user node.
2316 */
2317 uint16_t i = pPool->iUserFreeHead;
2318 if (i == NIL_PGMPOOL_USER_INDEX)
2319 {
2320 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2321 if (RT_FAILURE(rc))
2322 return rc;
2323 i = pPool->iUserFreeHead;
2324 }
2325 pPool->iUserFreeHead = paUsers[i].iNext;
2326
2327 /*
2328 * Initialize the user node and insert it.
2329 */
2330 paUsers[i].iNext = pPage->iUserHead;
2331 paUsers[i].iUser = iUser;
2332 paUsers[i].iUserTable = iUserTable;
2333 pPage->iUserHead = i;
2334
2335# ifdef PGMPOOL_WITH_CACHE
2336 /*
2337 * Tell the cache to update its replacement stats for this page.
2338 */
2339 pgmPoolCacheUsed(pPool, pPage);
2340# endif
2341 return VINF_SUCCESS;
2342}
2343# endif /* PGMPOOL_WITH_CACHE */
2344
2345
2346/**
2347 * Frees a user record associated with a page.
2348 *
2349 * This does not clear the entry in the user table, it simply replaces the
2350 * user record to the chain of free records.
2351 *
2352 * @param pPool The pool.
2353 * @param HCPhys The HC physical address of the shadow page.
2354 * @param iUser The shadow page pool index of the user table.
2355 * @param iUserTable The index into the user table (shadowed).
2356 */
2357static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2358{
2359 /*
2360 * Unlink and free the specified user entry.
2361 */
2362 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2363
2364 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2365 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2366 uint16_t i = pPage->iUserHead;
2367 if ( i != NIL_PGMPOOL_USER_INDEX
2368 && paUsers[i].iUser == iUser
2369 && paUsers[i].iUserTable == iUserTable)
2370 {
2371 pPage->iUserHead = paUsers[i].iNext;
2372
2373 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2374 paUsers[i].iNext = pPool->iUserFreeHead;
2375 pPool->iUserFreeHead = i;
2376 return;
2377 }
2378
2379 /* General: Linear search. */
2380 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2381 while (i != NIL_PGMPOOL_USER_INDEX)
2382 {
2383 if ( paUsers[i].iUser == iUser
2384 && paUsers[i].iUserTable == iUserTable)
2385 {
2386 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2387 paUsers[iPrev].iNext = paUsers[i].iNext;
2388 else
2389 pPage->iUserHead = paUsers[i].iNext;
2390
2391 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2392 paUsers[i].iNext = pPool->iUserFreeHead;
2393 pPool->iUserFreeHead = i;
2394 return;
2395 }
2396 iPrev = i;
2397 i = paUsers[i].iNext;
2398 }
2399
2400 /* Fatal: didn't find it */
2401 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2402 iUser, iUserTable, pPage->GCPhys));
2403}
2404
2405
2406/**
2407 * Gets the entry size of a shadow table.
2408 *
2409 * @param enmKind The kind of page.
2410 *
2411 * @returns The size of the entry in bytes. That is, 4 or 8.
2412 * @returns If the kind is not for a table, an assertion is raised and 0 is
2413 * returned.
2414 */
2415DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2416{
2417 switch (enmKind)
2418 {
2419 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2420 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2421 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2422 case PGMPOOLKIND_32BIT_PD:
2423 case PGMPOOLKIND_32BIT_PD_PHYS:
2424 return 4;
2425
2426 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2427 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2428 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2429 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2430 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2431 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2432 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2433 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2434 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2435 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2436 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2437 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2438 case PGMPOOLKIND_64BIT_PML4:
2439 case PGMPOOLKIND_PAE_PDPT:
2440 case PGMPOOLKIND_ROOT_NESTED:
2441 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2442 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2443 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2444 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2445 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2446 case PGMPOOLKIND_PAE_PD_PHYS:
2447 case PGMPOOLKIND_PAE_PDPT_PHYS:
2448 return 8;
2449
2450 default:
2451 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2452 }
2453}
2454
2455
2456/**
2457 * Gets the entry size of a guest table.
2458 *
2459 * @param enmKind The kind of page.
2460 *
2461 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2462 * @returns If the kind is not for a table, an assertion is raised and 0 is
2463 * returned.
2464 */
2465DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2466{
2467 switch (enmKind)
2468 {
2469 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2470 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2471 case PGMPOOLKIND_32BIT_PD:
2472 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2473 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2474 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2475 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2476 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2477 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2478 return 4;
2479
2480 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2481 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2482 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2483 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2484 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2485 case PGMPOOLKIND_64BIT_PML4:
2486 case PGMPOOLKIND_PAE_PDPT:
2487 return 8;
2488
2489 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2490 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2491 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2492 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2493 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2494 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2495 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2496 case PGMPOOLKIND_ROOT_NESTED:
2497 case PGMPOOLKIND_PAE_PD_PHYS:
2498 case PGMPOOLKIND_PAE_PDPT_PHYS:
2499 case PGMPOOLKIND_32BIT_PD_PHYS:
2500 /** @todo can we return 0? (nobody is calling this...) */
2501 AssertFailed();
2502 return 0;
2503
2504 default:
2505 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2506 }
2507}
2508
2509#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2510
2511/**
2512 * Scans one shadow page table for mappings of a physical page.
2513 *
2514 * @param pVM The VM handle.
2515 * @param pPhysPage The guest page in question.
2516 * @param iShw The shadow page table.
2517 * @param cRefs The number of references made in that PT.
2518 */
2519static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2520{
2521 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
2522 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2523
2524 /*
2525 * Assert sanity.
2526 */
2527 Assert(cRefs == 1);
2528 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2529 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2530
2531 /*
2532 * Then, clear the actual mappings to the page in the shadow PT.
2533 */
2534 switch (pPage->enmKind)
2535 {
2536 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2537 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2538 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2539 {
2540 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2541 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2542 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2543 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2544 {
2545 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2546 pPT->a[i].u = 0;
2547 cRefs--;
2548 if (!cRefs)
2549 return;
2550 }
2551#ifdef LOG_ENABLED
2552 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2553 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2554 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2555 {
2556 Log(("i=%d cRefs=%d\n", i, cRefs--));
2557 }
2558#endif
2559 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2560 break;
2561 }
2562
2563 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2564 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2565 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2566 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2567 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2568 {
2569 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2570 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2571 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2572 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2573 {
2574 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2575 pPT->a[i].u = 0;
2576 cRefs--;
2577 if (!cRefs)
2578 return;
2579 }
2580#ifdef LOG_ENABLED
2581 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2582 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2583 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2584 {
2585 Log(("i=%d cRefs=%d\n", i, cRefs--));
2586 }
2587#endif
2588 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
2589 break;
2590 }
2591
2592 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2593 {
2594 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2595 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2596 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2597 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2598 {
2599 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2600 pPT->a[i].u = 0;
2601 cRefs--;
2602 if (!cRefs)
2603 return;
2604 }
2605#ifdef LOG_ENABLED
2606 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2607 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2608 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2609 {
2610 Log(("i=%d cRefs=%d\n", i, cRefs--));
2611 }
2612#endif
2613 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2614 break;
2615 }
2616
2617 default:
2618 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2619 }
2620}
2621
2622
2623/**
2624 * Scans one shadow page table for mappings of a physical page.
2625 *
2626 * @param pVM The VM handle.
2627 * @param pPhysPage The guest page in question.
2628 * @param iShw The shadow page table.
2629 * @param cRefs The number of references made in that PT.
2630 */
2631void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2632{
2633 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2634 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
2635 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2636 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2637 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
2638 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2639}
2640
2641
2642/**
2643 * Flushes a list of shadow page tables mapping the same physical page.
2644 *
2645 * @param pVM The VM handle.
2646 * @param pPhysPage The guest page in question.
2647 * @param iPhysExt The physical cross reference extent list to flush.
2648 */
2649void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2650{
2651 Assert(PGMIsLockOwner(pVM));
2652 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2653 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2654 LogFlow(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%R[pgmpage] iPhysExt\n", pPhysPage, iPhysExt));
2655
2656 const uint16_t iPhysExtStart = iPhysExt;
2657 PPGMPOOLPHYSEXT pPhysExt;
2658 do
2659 {
2660 Assert(iPhysExt < pPool->cMaxPhysExts);
2661 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2662 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2663 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2664 {
2665 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2666 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2667 }
2668
2669 /* next */
2670 iPhysExt = pPhysExt->iNext;
2671 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2672
2673 /* insert the list into the free list and clear the ram range entry. */
2674 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2675 pPool->iPhysExtFreeHead = iPhysExtStart;
2676 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
2677
2678 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2679}
2680
2681#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2682
2683/**
2684 * Flushes all shadow page table mappings of the given guest page.
2685 *
2686 * This is typically called when the host page backing the guest one has been
2687 * replaced or when the page protection was changed due to an access handler.
2688 *
2689 * @returns VBox status code.
2690 * @retval VINF_SUCCESS if all references has been successfully cleared.
2691 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
2692 * pool cleaning. FF and sync flags are set.
2693 *
2694 * @param pVM The VM handle.
2695 * @param pPhysPage The guest page in question.
2696 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
2697 * flushed, it is NOT touched if this isn't necessary.
2698 * The caller MUST initialized this to @a false.
2699 */
2700int pgmPoolTrackFlushGCPhys(PVM pVM, PPGMPAGE pPhysPage, bool *pfFlushTLBs)
2701{
2702 PVMCPU pVCpu = VMMGetCpu(pVM);
2703 pgmLock(pVM);
2704 int rc = VINF_SUCCESS;
2705#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2706 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
2707 if (u16)
2708 {
2709 /*
2710 * The zero page is currently screwing up the tracking and we'll
2711 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2712 * is defined, zero pages won't normally be mapped. Some kind of solution
2713 * will be needed for this problem of course, but it will have to wait...
2714 */
2715 if (PGM_PAGE_IS_ZERO(pPhysPage))
2716 rc = VINF_PGM_GCPHYS_ALIASED;
2717 else
2718 {
2719# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2720 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
2721 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
2722 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
2723# endif
2724
2725 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
2726 pgmPoolTrackFlushGCPhysPT(pVM,
2727 pPhysPage,
2728 PGMPOOL_TD_GET_IDX(u16),
2729 PGMPOOL_TD_GET_CREFS(u16));
2730 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
2731 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, PGMPOOL_TD_GET_IDX(u16));
2732 else
2733 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
2734 *pfFlushTLBs = true;
2735
2736# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2737 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
2738# endif
2739 }
2740 }
2741
2742#elif defined(PGMPOOL_WITH_CACHE)
2743 if (PGM_PAGE_IS_ZERO(pPhysPage))
2744 rc = VINF_PGM_GCPHYS_ALIASED;
2745 else
2746 {
2747# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2748 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow kill the pool otherwise. */
2749 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
2750# endif
2751 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
2752 if (rc == VINF_SUCCESS)
2753 *pfFlushTLBs = true;
2754 }
2755
2756# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2757 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
2758# endif
2759
2760#else
2761 rc = VINF_PGM_GCPHYS_ALIASED;
2762#endif
2763
2764 if (rc == VINF_PGM_GCPHYS_ALIASED)
2765 {
2766 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
2767 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2768 rc = VINF_PGM_SYNC_CR3;
2769 }
2770 pgmUnlock(pVM);
2771 return rc;
2772}
2773
2774
2775/**
2776 * Scans all shadow page tables for mappings of a physical page.
2777 *
2778 * This may be slow, but it's most likely more efficient than cleaning
2779 * out the entire page pool / cache.
2780 *
2781 * @returns VBox status code.
2782 * @retval VINF_SUCCESS if all references has been successfully cleared.
2783 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2784 * a page pool cleaning.
2785 *
2786 * @param pVM The VM handle.
2787 * @param pPhysPage The guest page in question.
2788 */
2789int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2790{
2791 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2792 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2793 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
2794 pPool->cUsedPages, pPool->cPresent, pPhysPage));
2795
2796#if 1
2797 /*
2798 * There is a limit to what makes sense.
2799 */
2800 if (pPool->cPresent > 1024)
2801 {
2802 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2803 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2804 return VINF_PGM_GCPHYS_ALIASED;
2805 }
2806#endif
2807
2808 /*
2809 * Iterate all the pages until we've encountered all that in use.
2810 * This is simple but not quite optimal solution.
2811 */
2812 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2813 const uint32_t u32 = u64;
2814 unsigned cLeft = pPool->cUsedPages;
2815 unsigned iPage = pPool->cCurPages;
2816 while (--iPage >= PGMPOOL_IDX_FIRST)
2817 {
2818 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2819 if (pPage->GCPhys != NIL_RTGCPHYS)
2820 {
2821 switch (pPage->enmKind)
2822 {
2823 /*
2824 * We only care about shadow page tables.
2825 */
2826 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2827 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2828 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2829 {
2830 unsigned cPresent = pPage->cPresent;
2831 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2832 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2833 if (pPT->a[i].n.u1Present)
2834 {
2835 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2836 {
2837 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2838 pPT->a[i].u = 0;
2839 }
2840 if (!--cPresent)
2841 break;
2842 }
2843 break;
2844 }
2845
2846 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2847 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2848 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2849 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2850 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2851 {
2852 unsigned cPresent = pPage->cPresent;
2853 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2854 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2855 if (pPT->a[i].n.u1Present)
2856 {
2857 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2858 {
2859 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2860 pPT->a[i].u = 0;
2861 }
2862 if (!--cPresent)
2863 break;
2864 }
2865 break;
2866 }
2867 }
2868 if (!--cLeft)
2869 break;
2870 }
2871 }
2872
2873 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
2874 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2875 return VINF_SUCCESS;
2876}
2877
2878
2879/**
2880 * Clears the user entry in a user table.
2881 *
2882 * This is used to remove all references to a page when flushing it.
2883 */
2884static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2885{
2886 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2887 Assert(pUser->iUser < pPool->cCurPages);
2888 uint32_t iUserTable = pUser->iUserTable;
2889
2890 /*
2891 * Map the user page.
2892 */
2893 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2894 union
2895 {
2896 uint64_t *pau64;
2897 uint32_t *pau32;
2898 } u;
2899 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
2900
2901 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
2902
2903 /* Safety precaution in case we change the paging for other modes too in the future. */
2904 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
2905
2906#ifdef VBOX_STRICT
2907 /*
2908 * Some sanity checks.
2909 */
2910 switch (pUserPage->enmKind)
2911 {
2912 case PGMPOOLKIND_32BIT_PD:
2913 case PGMPOOLKIND_32BIT_PD_PHYS:
2914 Assert(iUserTable < X86_PG_ENTRIES);
2915 break;
2916 case PGMPOOLKIND_PAE_PDPT:
2917 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2918 case PGMPOOLKIND_PAE_PDPT_PHYS:
2919 Assert(iUserTable < 4);
2920 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2921 break;
2922 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2923 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2924 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2925 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2926 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2927 case PGMPOOLKIND_PAE_PD_PHYS:
2928 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2929 break;
2930 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2931 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2932 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
2933 break;
2934 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2935 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2936 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2937 break;
2938 case PGMPOOLKIND_64BIT_PML4:
2939 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2940 /* GCPhys >> PAGE_SHIFT is the index here */
2941 break;
2942 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2943 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2944 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2945 break;
2946
2947 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2948 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2949 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2950 break;
2951
2952 case PGMPOOLKIND_ROOT_NESTED:
2953 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2954 break;
2955
2956 default:
2957 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2958 break;
2959 }
2960#endif /* VBOX_STRICT */
2961
2962 /*
2963 * Clear the entry in the user page.
2964 */
2965 switch (pUserPage->enmKind)
2966 {
2967 /* 32-bit entries */
2968 case PGMPOOLKIND_32BIT_PD:
2969 case PGMPOOLKIND_32BIT_PD_PHYS:
2970 u.pau32[iUserTable] = 0;
2971 break;
2972
2973 /* 64-bit entries */
2974 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2975 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2976 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2977 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2978 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2979#if defined(IN_RC)
2980 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
2981 * non-present PDPT will continue to cause page faults.
2982 */
2983 ASMReloadCR3();
2984#endif
2985 /* no break */
2986 case PGMPOOLKIND_PAE_PD_PHYS:
2987 case PGMPOOLKIND_PAE_PDPT_PHYS:
2988 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2989 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2990 case PGMPOOLKIND_64BIT_PML4:
2991 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2992 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2993 case PGMPOOLKIND_PAE_PDPT:
2994 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2995 case PGMPOOLKIND_ROOT_NESTED:
2996 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2997 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2998 u.pau64[iUserTable] = 0;
2999 break;
3000
3001 default:
3002 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3003 }
3004}
3005
3006
3007/**
3008 * Clears all users of a page.
3009 */
3010static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3011{
3012 /*
3013 * Free all the user records.
3014 */
3015 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3016
3017 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3018 uint16_t i = pPage->iUserHead;
3019 while (i != NIL_PGMPOOL_USER_INDEX)
3020 {
3021 /* Clear enter in user table. */
3022 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3023
3024 /* Free it. */
3025 const uint16_t iNext = paUsers[i].iNext;
3026 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3027 paUsers[i].iNext = pPool->iUserFreeHead;
3028 pPool->iUserFreeHead = i;
3029
3030 /* Next. */
3031 i = iNext;
3032 }
3033 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3034}
3035
3036#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3037
3038/**
3039 * Allocates a new physical cross reference extent.
3040 *
3041 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3042 * @param pVM The VM handle.
3043 * @param piPhysExt Where to store the phys ext index.
3044 */
3045PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3046{
3047 Assert(PGMIsLockOwner(pVM));
3048 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3049 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3050 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3051 {
3052 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3053 return NULL;
3054 }
3055 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3056 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3057 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3058 *piPhysExt = iPhysExt;
3059 return pPhysExt;
3060}
3061
3062
3063/**
3064 * Frees a physical cross reference extent.
3065 *
3066 * @param pVM The VM handle.
3067 * @param iPhysExt The extent to free.
3068 */
3069void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3070{
3071 Assert(PGMIsLockOwner(pVM));
3072 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3073 Assert(iPhysExt < pPool->cMaxPhysExts);
3074 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3075 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3076 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3077 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3078 pPool->iPhysExtFreeHead = iPhysExt;
3079}
3080
3081
3082/**
3083 * Frees a physical cross reference extent.
3084 *
3085 * @param pVM The VM handle.
3086 * @param iPhysExt The extent to free.
3087 */
3088void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3089{
3090 Assert(PGMIsLockOwner(pVM));
3091 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3092
3093 const uint16_t iPhysExtStart = iPhysExt;
3094 PPGMPOOLPHYSEXT pPhysExt;
3095 do
3096 {
3097 Assert(iPhysExt < pPool->cMaxPhysExts);
3098 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3099 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3100 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3101
3102 /* next */
3103 iPhysExt = pPhysExt->iNext;
3104 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3105
3106 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3107 pPool->iPhysExtFreeHead = iPhysExtStart;
3108}
3109
3110
3111/**
3112 * Insert a reference into a list of physical cross reference extents.
3113 *
3114 * @returns The new tracking data for PGMPAGE.
3115 *
3116 * @param pVM The VM handle.
3117 * @param iPhysExt The physical extent index of the list head.
3118 * @param iShwPT The shadow page table index.
3119 *
3120 */
3121static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3122{
3123 Assert(PGMIsLockOwner(pVM));
3124 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3125 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3126
3127 /* special common case. */
3128 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3129 {
3130 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3131 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3132 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d}\n", iPhysExt, iShwPT));
3133 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3134 }
3135
3136 /* general treatment. */
3137 const uint16_t iPhysExtStart = iPhysExt;
3138 unsigned cMax = 15;
3139 for (;;)
3140 {
3141 Assert(iPhysExt < pPool->cMaxPhysExts);
3142 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3143 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3144 {
3145 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3146 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3147 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3148 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3149 }
3150 if (!--cMax)
3151 {
3152 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3153 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3154 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3155 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3156 }
3157 }
3158
3159 /* add another extent to the list. */
3160 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3161 if (!pNew)
3162 {
3163 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3164 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3165 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3166 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3167 }
3168 pNew->iNext = iPhysExtStart;
3169 pNew->aidx[0] = iShwPT;
3170 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3171 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3172}
3173
3174
3175/**
3176 * Add a reference to guest physical page where extents are in use.
3177 *
3178 * @returns The new tracking data for PGMPAGE.
3179 *
3180 * @param pVM The VM handle.
3181 * @param u16 The ram range flags (top 16-bits).
3182 * @param iShwPT The shadow page table index.
3183 */
3184uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3185{
3186 pgmLock(pVM);
3187 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3188 {
3189 /*
3190 * Convert to extent list.
3191 */
3192 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3193 uint16_t iPhysExt;
3194 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3195 if (pPhysExt)
3196 {
3197 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3198 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3199 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3200 pPhysExt->aidx[1] = iShwPT;
3201 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3202 }
3203 else
3204 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3205 }
3206 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3207 {
3208 /*
3209 * Insert into the extent list.
3210 */
3211 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3212 }
3213 else
3214 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3215 pgmUnlock(pVM);
3216 return u16;
3217}
3218
3219
3220/**
3221 * Clear references to guest physical memory.
3222 *
3223 * @param pPool The pool.
3224 * @param pPage The page.
3225 * @param pPhysPage Pointer to the aPages entry in the ram range.
3226 */
3227void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3228{
3229 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3230 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3231
3232 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3233 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3234 {
3235 PVM pVM = pPool->CTX_SUFF(pVM);
3236 pgmLock(pVM);
3237
3238 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3239 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3240 do
3241 {
3242 Assert(iPhysExt < pPool->cMaxPhysExts);
3243
3244 /*
3245 * Look for the shadow page and check if it's all freed.
3246 */
3247 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3248 {
3249 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3250 {
3251 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3252
3253 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3254 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3255 {
3256 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3257 pgmUnlock(pVM);
3258 return;
3259 }
3260
3261 /* we can free the node. */
3262 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3263 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3264 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3265 {
3266 /* lonely node */
3267 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3268 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3269 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3270 }
3271 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3272 {
3273 /* head */
3274 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3275 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3276 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3277 }
3278 else
3279 {
3280 /* in list */
3281 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3282 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3283 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3284 }
3285 iPhysExt = iPhysExtNext;
3286 pgmUnlock(pVM);
3287 return;
3288 }
3289 }
3290
3291 /* next */
3292 iPhysExtPrev = iPhysExt;
3293 iPhysExt = paPhysExts[iPhysExt].iNext;
3294 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3295
3296 pgmUnlock(pVM);
3297 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3298 }
3299 else /* nothing to do */
3300 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3301}
3302
3303
3304/**
3305 * Clear references to guest physical memory.
3306 *
3307 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3308 * is assumed to be correct, so the linear search can be skipped and we can assert
3309 * at an earlier point.
3310 *
3311 * @param pPool The pool.
3312 * @param pPage The page.
3313 * @param HCPhys The host physical address corresponding to the guest page.
3314 * @param GCPhys The guest physical address corresponding to HCPhys.
3315 */
3316static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3317{
3318 /*
3319 * Walk range list.
3320 */
3321 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3322 while (pRam)
3323 {
3324 RTGCPHYS off = GCPhys - pRam->GCPhys;
3325 if (off < pRam->cb)
3326 {
3327 /* does it match? */
3328 const unsigned iPage = off >> PAGE_SHIFT;
3329 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3330#ifdef LOG_ENABLED
3331RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3332Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3333#endif
3334 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3335 {
3336 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3337 return;
3338 }
3339 break;
3340 }
3341 pRam = pRam->CTX_SUFF(pNext);
3342 }
3343 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3344}
3345
3346
3347/**
3348 * Clear references to guest physical memory.
3349 *
3350 * @param pPool The pool.
3351 * @param pPage The page.
3352 * @param HCPhys The host physical address corresponding to the guest page.
3353 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3354 */
3355static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3356{
3357 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
3358
3359 /*
3360 * Walk range list.
3361 */
3362 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3363 while (pRam)
3364 {
3365 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3366 if (off < pRam->cb)
3367 {
3368 /* does it match? */
3369 const unsigned iPage = off >> PAGE_SHIFT;
3370 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3371 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3372 {
3373 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3374 return;
3375 }
3376 break;
3377 }
3378 pRam = pRam->CTX_SUFF(pNext);
3379 }
3380
3381 /*
3382 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3383 */
3384 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3385 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3386 while (pRam)
3387 {
3388 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3389 while (iPage-- > 0)
3390 {
3391 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3392 {
3393 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3394 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3395 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3396 return;
3397 }
3398 }
3399 pRam = pRam->CTX_SUFF(pNext);
3400 }
3401
3402 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3403}
3404
3405
3406/**
3407 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3408 *
3409 * @param pPool The pool.
3410 * @param pPage The page.
3411 * @param pShwPT The shadow page table (mapping of the page).
3412 * @param pGstPT The guest page table.
3413 */
3414DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3415{
3416 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3417 if (pShwPT->a[i].n.u1Present)
3418 {
3419 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3420 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3421 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3422 if (!--pPage->cPresent)
3423 break;
3424 }
3425}
3426
3427
3428/**
3429 * Clear references to guest physical memory in a PAE / 32-bit page table.
3430 *
3431 * @param pPool The pool.
3432 * @param pPage The page.
3433 * @param pShwPT The shadow page table (mapping of the page).
3434 * @param pGstPT The guest page table (just a half one).
3435 */
3436DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3437{
3438 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3439 if (pShwPT->a[i].n.u1Present)
3440 {
3441 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
3442 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3443 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3444 }
3445}
3446
3447
3448/**
3449 * Clear references to guest physical memory in a PAE / PAE page table.
3450 *
3451 * @param pPool The pool.
3452 * @param pPage The page.
3453 * @param pShwPT The shadow page table (mapping of the page).
3454 * @param pGstPT The guest page table.
3455 */
3456DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3457{
3458 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3459 if (pShwPT->a[i].n.u1Present)
3460 {
3461 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3462 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3463 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3464 }
3465}
3466
3467
3468/**
3469 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3470 *
3471 * @param pPool The pool.
3472 * @param pPage The page.
3473 * @param pShwPT The shadow page table (mapping of the page).
3474 */
3475DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3476{
3477 RTGCPHYS GCPhys = pPage->GCPhys;
3478 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3479 if (pShwPT->a[i].n.u1Present)
3480 {
3481 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3482 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3483 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3484 }
3485}
3486
3487
3488/**
3489 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3490 *
3491 * @param pPool The pool.
3492 * @param pPage The page.
3493 * @param pShwPT The shadow page table (mapping of the page).
3494 */
3495DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3496{
3497 RTGCPHYS GCPhys = pPage->GCPhys;
3498 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3499 if (pShwPT->a[i].n.u1Present)
3500 {
3501 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3502 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3503 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3504 }
3505}
3506
3507#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3508
3509
3510/**
3511 * Clear references to shadowed pages in a 32 bits page directory.
3512 *
3513 * @param pPool The pool.
3514 * @param pPage The page.
3515 * @param pShwPD The shadow page directory (mapping of the page).
3516 */
3517DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
3518{
3519 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3520 {
3521 if ( pShwPD->a[i].n.u1Present
3522 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3523 )
3524 {
3525 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
3526 if (pSubPage)
3527 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3528 else
3529 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
3530 }
3531 }
3532}
3533
3534/**
3535 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3536 *
3537 * @param pPool The pool.
3538 * @param pPage The page.
3539 * @param pShwPD The shadow page directory (mapping of the page).
3540 */
3541DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3542{
3543 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3544 {
3545 if ( pShwPD->a[i].n.u1Present
3546 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3547 )
3548 {
3549 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3550 if (pSubPage)
3551 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3552 else
3553 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3554 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3555 }
3556 }
3557}
3558
3559/**
3560 * Clear references to shadowed pages in a PAE page directory pointer table.
3561 *
3562 * @param pPool The pool.
3563 * @param pPage The page.
3564 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3565 */
3566DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3567{
3568 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
3569 {
3570 if ( pShwPDPT->a[i].n.u1Present
3571 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
3572 )
3573 {
3574 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3575 if (pSubPage)
3576 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3577 else
3578 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3579 }
3580 }
3581}
3582
3583
3584/**
3585 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3586 *
3587 * @param pPool The pool.
3588 * @param pPage The page.
3589 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3590 */
3591DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3592{
3593 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3594 {
3595 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
3596 if (pShwPDPT->a[i].n.u1Present)
3597 {
3598 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3599 if (pSubPage)
3600 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3601 else
3602 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3603 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3604 }
3605 }
3606}
3607
3608
3609/**
3610 * Clear references to shadowed pages in a 64-bit level 4 page table.
3611 *
3612 * @param pPool The pool.
3613 * @param pPage The page.
3614 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3615 */
3616DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3617{
3618 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3619 {
3620 if (pShwPML4->a[i].n.u1Present)
3621 {
3622 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3623 if (pSubPage)
3624 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3625 else
3626 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3627 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3628 }
3629 }
3630}
3631
3632
3633/**
3634 * Clear references to shadowed pages in an EPT page table.
3635 *
3636 * @param pPool The pool.
3637 * @param pPage The page.
3638 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3639 */
3640DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3641{
3642 RTGCPHYS GCPhys = pPage->GCPhys;
3643 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3644 if (pShwPT->a[i].n.u1Present)
3645 {
3646 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3647 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3648 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3649 }
3650}
3651
3652
3653/**
3654 * Clear references to shadowed pages in an EPT page directory.
3655 *
3656 * @param pPool The pool.
3657 * @param pPage The page.
3658 * @param pShwPD The shadow page directory (mapping of the page).
3659 */
3660DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3661{
3662 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3663 {
3664 if (pShwPD->a[i].n.u1Present)
3665 {
3666 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3667 if (pSubPage)
3668 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3669 else
3670 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3671 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3672 }
3673 }
3674}
3675
3676
3677/**
3678 * Clear references to shadowed pages in an EPT page directory pointer table.
3679 *
3680 * @param pPool The pool.
3681 * @param pPage The page.
3682 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3683 */
3684DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3685{
3686 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3687 {
3688 if (pShwPDPT->a[i].n.u1Present)
3689 {
3690 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3691 if (pSubPage)
3692 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3693 else
3694 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3695 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3696 }
3697 }
3698}
3699
3700
3701/**
3702 * Clears all references made by this page.
3703 *
3704 * This includes other shadow pages and GC physical addresses.
3705 *
3706 * @param pPool The pool.
3707 * @param pPage The page.
3708 */
3709static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3710{
3711 /*
3712 * Map the shadow page and take action according to the page kind.
3713 */
3714 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
3715 switch (pPage->enmKind)
3716 {
3717#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3718 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3719 {
3720 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3721 void *pvGst;
3722 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3723 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3724 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3725 break;
3726 }
3727
3728 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3729 {
3730 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3731 void *pvGst;
3732 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3733 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3734 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3735 break;
3736 }
3737
3738 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3739 {
3740 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3741 void *pvGst;
3742 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3743 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3744 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3745 break;
3746 }
3747
3748 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3749 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3750 {
3751 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3752 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3753 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3754 break;
3755 }
3756
3757 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3758 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3759 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3760 {
3761 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3762 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3763 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3764 break;
3765 }
3766
3767#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3768 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3769 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3770 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3771 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3772 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3773 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3774 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3775 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3776 break;
3777#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3778
3779 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3780 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3781 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3782 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3783 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3784 case PGMPOOLKIND_PAE_PD_PHYS:
3785 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3786 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3787 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3788 break;
3789
3790 case PGMPOOLKIND_32BIT_PD_PHYS:
3791 case PGMPOOLKIND_32BIT_PD:
3792 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
3793 break;
3794
3795 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3796 case PGMPOOLKIND_PAE_PDPT:
3797 case PGMPOOLKIND_PAE_PDPT_PHYS:
3798 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
3799 break;
3800
3801 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3802 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3803 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3804 break;
3805
3806 case PGMPOOLKIND_64BIT_PML4:
3807 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3808 break;
3809
3810 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3811 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
3812 break;
3813
3814 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3815 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
3816 break;
3817
3818 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3819 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
3820 break;
3821
3822 default:
3823 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3824 }
3825
3826 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3827 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3828 ASMMemZeroPage(pvShw);
3829 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3830 pPage->fZeroed = true;
3831 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
3832}
3833#endif /* PGMPOOL_WITH_USER_TRACKING */
3834
3835/**
3836 * Flushes a pool page.
3837 *
3838 * This moves the page to the free list after removing all user references to it.
3839 *
3840 * @returns VBox status code.
3841 * @retval VINF_SUCCESS on success.
3842 * @param pPool The pool.
3843 * @param HCPhys The HC physical address of the shadow page.
3844 */
3845int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3846{
3847 PVM pVM = pPool->CTX_SUFF(pVM);
3848
3849 int rc = VINF_SUCCESS;
3850 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3851 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
3852 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
3853
3854 /*
3855 * Quietly reject any attempts at flushing any of the special root pages.
3856 */
3857 if (pPage->idx < PGMPOOL_IDX_FIRST)
3858 {
3859 AssertFailed(); /* can no longer happen */
3860 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
3861 return VINF_SUCCESS;
3862 }
3863
3864 pgmLock(pVM);
3865
3866 /*
3867 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
3868 */
3869 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
3870 {
3871 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
3872 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
3873 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
3874 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
3875 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
3876 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
3877 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
3878 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
3879 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
3880 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
3881 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
3882 pgmUnlock(pVM);
3883 return VINF_SUCCESS;
3884 }
3885
3886#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3887 /* Start a subset so we won't run out of mapping space. */
3888 PVMCPU pVCpu = VMMGetCpu(pVM);
3889 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3890#endif
3891
3892 /*
3893 * Mark the page as being in need of a ASMMemZeroPage().
3894 */
3895 pPage->fZeroed = false;
3896
3897#ifdef PGMPOOL_WITH_USER_TRACKING
3898 /*
3899 * Clear the page.
3900 */
3901 pgmPoolTrackClearPageUsers(pPool, pPage);
3902 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3903 pgmPoolTrackDeref(pPool, pPage);
3904 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3905#endif
3906
3907#ifdef PGMPOOL_WITH_CACHE
3908 /*
3909 * Flush it from the cache.
3910 */
3911 pgmPoolCacheFlushPage(pPool, pPage);
3912#endif /* PGMPOOL_WITH_CACHE */
3913
3914#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3915 /* Heavy stuff done. */
3916 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3917#endif
3918
3919#ifdef PGMPOOL_WITH_MONITORING
3920 /*
3921 * Deregistering the monitoring.
3922 */
3923 if (pPage->fMonitored)
3924 rc = pgmPoolMonitorFlush(pPool, pPage);
3925#endif
3926
3927 /*
3928 * Free the page.
3929 */
3930 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3931 pPage->iNext = pPool->iFreeHead;
3932 pPool->iFreeHead = pPage->idx;
3933 pPage->enmKind = PGMPOOLKIND_FREE;
3934 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
3935 pPage->GCPhys = NIL_RTGCPHYS;
3936 pPage->fReusedFlushPending = false;
3937
3938 pPool->cUsedPages--;
3939 pgmUnlock(pVM);
3940 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3941 return rc;
3942}
3943
3944
3945/**
3946 * Frees a usage of a pool page.
3947 *
3948 * The caller is responsible to updating the user table so that it no longer
3949 * references the shadow page.
3950 *
3951 * @param pPool The pool.
3952 * @param HCPhys The HC physical address of the shadow page.
3953 * @param iUser The shadow page pool index of the user table.
3954 * @param iUserTable The index into the user table (shadowed).
3955 */
3956void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3957{
3958 PVM pVM = pPool->CTX_SUFF(pVM);
3959
3960 STAM_PROFILE_START(&pPool->StatFree, a);
3961 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
3962 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
3963 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3964 pgmLock(pVM);
3965#ifdef PGMPOOL_WITH_USER_TRACKING
3966 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3967#endif
3968#ifdef PGMPOOL_WITH_CACHE
3969 if (!pPage->fCached)
3970#endif
3971 pgmPoolFlushPage(pPool, pPage);
3972 pgmUnlock(pVM);
3973 STAM_PROFILE_STOP(&pPool->StatFree, a);
3974}
3975
3976
3977/**
3978 * Makes one or more free page free.
3979 *
3980 * @returns VBox status code.
3981 * @retval VINF_SUCCESS on success.
3982 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3983 *
3984 * @param pPool The pool.
3985 * @param enmKind Page table kind
3986 * @param iUser The user of the page.
3987 */
3988static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
3989{
3990 PVM pVM = pPool->CTX_SUFF(pVM);
3991
3992 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3993
3994 /*
3995 * If the pool isn't full grown yet, expand it.
3996 */
3997 if ( pPool->cCurPages < pPool->cMaxPages
3998#if defined(IN_RC)
3999 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4000 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4001 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4002#endif
4003 )
4004 {
4005 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4006#ifdef IN_RING3
4007 int rc = PGMR3PoolGrow(pVM);
4008#else
4009 int rc = CTXALLMID(VMM, CallHost)(pVM, VMMCALLHOST_PGM_POOL_GROW, 0);
4010#endif
4011 if (RT_FAILURE(rc))
4012 return rc;
4013 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4014 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4015 return VINF_SUCCESS;
4016 }
4017
4018#ifdef PGMPOOL_WITH_CACHE
4019 /*
4020 * Free one cached page.
4021 */
4022 return pgmPoolCacheFreeOne(pPool, iUser);
4023#else
4024 /*
4025 * Flush the pool.
4026 *
4027 * If we have tracking enabled, it should be possible to come up with
4028 * a cheap replacement strategy...
4029 */
4030 /* @todo This path no longer works (CR3 root pages will be flushed)!! */
4031 AssertCompileFailed();
4032 Assert(!CPUMIsGuestInLongMode(pVM));
4033 pgmPoolFlushAllInt(pPool);
4034 return VERR_PGM_POOL_FLUSHED;
4035#endif
4036}
4037
4038/**
4039 * Allocates a page from the pool.
4040 *
4041 * This page may actually be a cached page and not in need of any processing
4042 * on the callers part.
4043 *
4044 * @returns VBox status code.
4045 * @retval VINF_SUCCESS if a NEW page was allocated.
4046 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4047 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4048 * @param pVM The VM handle.
4049 * @param GCPhys The GC physical address of the page we're gonna shadow.
4050 * For 4MB and 2MB PD entries, it's the first address the
4051 * shadow PT is covering.
4052 * @param enmKind The kind of mapping.
4053 * @param enmAccess Access type for the mapping (only relevant for big pages)
4054 * @param iUser The shadow page pool index of the user table.
4055 * @param iUserTable The index into the user table (shadowed).
4056 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4057 * @param fLockPage Lock the page
4058 */
4059int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4060{
4061 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4062 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4063 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4064 *ppPage = NULL;
4065 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4066 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4067 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4068
4069 pgmLock(pVM);
4070
4071#ifdef PGMPOOL_WITH_CACHE
4072 if (pPool->fCacheEnabled)
4073 {
4074 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4075 if (RT_SUCCESS(rc2))
4076 {
4077 if (fLockPage)
4078 pgmPoolLockPage(pPool, *ppPage);
4079 pgmUnlock(pVM);
4080 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4081 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4082 return rc2;
4083 }
4084 }
4085#endif
4086
4087 /*
4088 * Allocate a new one.
4089 */
4090 int rc = VINF_SUCCESS;
4091 uint16_t iNew = pPool->iFreeHead;
4092 if (iNew == NIL_PGMPOOL_IDX)
4093 {
4094 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4095 if (RT_FAILURE(rc))
4096 {
4097 pgmUnlock(pVM);
4098 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4099 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4100 return rc;
4101 }
4102 iNew = pPool->iFreeHead;
4103 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4104 }
4105
4106 /* unlink the free head */
4107 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4108 pPool->iFreeHead = pPage->iNext;
4109 pPage->iNext = NIL_PGMPOOL_IDX;
4110
4111 /*
4112 * Initialize it.
4113 */
4114 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4115 pPage->enmKind = enmKind;
4116 pPage->enmAccess = enmAccess;
4117 pPage->GCPhys = GCPhys;
4118 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4119 pPage->fMonitored = false;
4120 pPage->fCached = false;
4121 pPage->fReusedFlushPending = false;
4122#ifdef PGMPOOL_WITH_MONITORING
4123 pPage->cModifications = 0;
4124 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4125 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4126#else
4127 pPage->fCR3Mix = false;
4128#endif
4129#ifdef PGMPOOL_WITH_USER_TRACKING
4130 pPage->cPresent = 0;
4131 pPage->iFirstPresent = ~0;
4132
4133 /*
4134 * Insert into the tracking and cache. If this fails, free the page.
4135 */
4136 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4137 if (RT_FAILURE(rc3))
4138 {
4139 pPool->cUsedPages--;
4140 pPage->enmKind = PGMPOOLKIND_FREE;
4141 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4142 pPage->GCPhys = NIL_RTGCPHYS;
4143 pPage->iNext = pPool->iFreeHead;
4144 pPool->iFreeHead = pPage->idx;
4145 pgmUnlock(pVM);
4146 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4147 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4148 return rc3;
4149 }
4150#endif /* PGMPOOL_WITH_USER_TRACKING */
4151
4152 /*
4153 * Commit the allocation, clear the page and return.
4154 */
4155#ifdef VBOX_WITH_STATISTICS
4156 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4157 pPool->cUsedPagesHigh = pPool->cUsedPages;
4158#endif
4159
4160 if (!pPage->fZeroed)
4161 {
4162 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4163 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4164 ASMMemZeroPage(pv);
4165 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4166 }
4167
4168 *ppPage = pPage;
4169 if (fLockPage)
4170 pgmPoolLockPage(pPool, pPage);
4171 pgmUnlock(pVM);
4172 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4173 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4174 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4175 return rc;
4176}
4177
4178
4179/**
4180 * Frees a usage of a pool page.
4181 *
4182 * @param pVM The VM handle.
4183 * @param HCPhys The HC physical address of the shadow page.
4184 * @param iUser The shadow page pool index of the user table.
4185 * @param iUserTable The index into the user table (shadowed).
4186 */
4187void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4188{
4189 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4190 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4191 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4192}
4193
4194/**
4195 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4196 *
4197 * @returns Pointer to the shadow page structure.
4198 * @param pPool The pool.
4199 * @param HCPhys The HC physical address of the shadow page.
4200 */
4201PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4202{
4203 PVM pVM = pPool->CTX_SUFF(pVM);
4204
4205 Assert(PGMIsLockOwner(pVM));
4206
4207 /*
4208 * Look up the page.
4209 */
4210 pgmLock(pVM);
4211 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4212 pgmUnlock(pVM);
4213
4214 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4215 return pPage;
4216}
4217
4218
4219#ifdef IN_RING3
4220/**
4221 * Flushes the entire cache.
4222 *
4223 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4224 * and execute this CR3 flush.
4225 *
4226 * @param pPool The pool.
4227 */
4228void pgmR3PoolReset(PVM pVM)
4229{
4230 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4231
4232 Assert(PGMIsLockOwner(pVM));
4233 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
4234 LogFlow(("pgmPoolFlushAllInt:\n"));
4235
4236 /*
4237 * If there are no pages in the pool, there is nothing to do.
4238 */
4239 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4240 {
4241 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4242 return;
4243 }
4244
4245 /*
4246 * Exit the shadow mode since we're going to clear everything,
4247 * including the root page.
4248 */
4249 for (unsigned i=0;i<pVM->cCPUs;i++)
4250 {
4251 PVMCPU pVCpu = &pVM->aCpus[i];
4252 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
4253 }
4254
4255 /*
4256 * Nuke the free list and reinsert all pages into it.
4257 */
4258 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4259 {
4260 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4261
4262 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
4263#ifdef PGMPOOL_WITH_MONITORING
4264 if (pPage->fMonitored)
4265 pgmPoolMonitorFlush(pPool, pPage);
4266 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4267 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4268 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4269 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4270 pPage->cModifications = 0;
4271#endif
4272 pPage->GCPhys = NIL_RTGCPHYS;
4273 pPage->enmKind = PGMPOOLKIND_FREE;
4274 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4275 Assert(pPage->idx == i);
4276 pPage->iNext = i + 1;
4277 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4278 pPage->fSeenNonGlobal = false;
4279 pPage->fMonitored = false;
4280 pPage->fCached = false;
4281 pPage->fReusedFlushPending = false;
4282#ifdef PGMPOOL_WITH_USER_TRACKING
4283 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4284#else
4285 pPage->fCR3Mix = false;
4286#endif
4287#ifdef PGMPOOL_WITH_CACHE
4288 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4289 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4290#endif
4291 pPage->cLocked = 0;
4292 }
4293 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4294 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4295 pPool->cUsedPages = 0;
4296
4297#ifdef PGMPOOL_WITH_USER_TRACKING
4298 /*
4299 * Zap and reinitialize the user records.
4300 */
4301 pPool->cPresent = 0;
4302 pPool->iUserFreeHead = 0;
4303 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4304 const unsigned cMaxUsers = pPool->cMaxUsers;
4305 for (unsigned i = 0; i < cMaxUsers; i++)
4306 {
4307 paUsers[i].iNext = i + 1;
4308 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4309 paUsers[i].iUserTable = 0xfffffffe;
4310 }
4311 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4312#endif
4313
4314#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4315 /*
4316 * Clear all the GCPhys links and rebuild the phys ext free list.
4317 */
4318 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
4319 pRam;
4320 pRam = pRam->CTX_SUFF(pNext))
4321 {
4322 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4323 while (iPage-- > 0)
4324 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
4325 }
4326
4327 pPool->iPhysExtFreeHead = 0;
4328 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4329 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4330 for (unsigned i = 0; i < cMaxPhysExts; i++)
4331 {
4332 paPhysExts[i].iNext = i + 1;
4333 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4334 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4335 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4336 }
4337 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4338#endif
4339
4340#ifdef PGMPOOL_WITH_MONITORING
4341 /*
4342 * Just zap the modified list.
4343 */
4344 pPool->cModifiedPages = 0;
4345 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4346#endif
4347
4348#ifdef PGMPOOL_WITH_CACHE
4349 /*
4350 * Clear the GCPhys hash and the age list.
4351 */
4352 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4353 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4354 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4355 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4356#endif
4357
4358 /*
4359 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4360 */
4361 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4362 {
4363 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4364 pPage->iNext = NIL_PGMPOOL_IDX;
4365#ifdef PGMPOOL_WITH_MONITORING
4366 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4367 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4368 pPage->cModifications = 0;
4369 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4370 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4371 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4372 if (pPage->fMonitored)
4373 {
4374 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4375 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4376 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4377 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4378 pPool->pszAccessHandler);
4379 AssertFatalRCSuccess(rc);
4380# ifdef PGMPOOL_WITH_CACHE
4381 pgmPoolHashInsert(pPool, pPage);
4382# endif
4383 }
4384#endif
4385#ifdef PGMPOOL_WITH_USER_TRACKING
4386 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4387#endif
4388#ifdef PGMPOOL_WITH_CACHE
4389 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4390 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4391#endif
4392 }
4393
4394 for (unsigned i=0;i<pVM->cCPUs;i++)
4395 {
4396 PVMCPU pVCpu = &pVM->aCpus[i];
4397 /*
4398 * Re-enter the shadowing mode and assert Sync CR3 FF.
4399 */
4400 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
4401 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
4402 }
4403
4404 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4405}
4406#endif /* IN_RING3 */
4407
4408#ifdef LOG_ENABLED
4409static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
4410{
4411 switch(enmKind)
4412 {
4413 case PGMPOOLKIND_INVALID:
4414 return "PGMPOOLKIND_INVALID";
4415 case PGMPOOLKIND_FREE:
4416 return "PGMPOOLKIND_FREE";
4417 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4418 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4419 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4420 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4421 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4422 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4423 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4424 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4425 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4426 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4427 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4428 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
4429 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4430 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
4431 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4432 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
4433 case PGMPOOLKIND_32BIT_PD:
4434 return "PGMPOOLKIND_32BIT_PD";
4435 case PGMPOOLKIND_32BIT_PD_PHYS:
4436 return "PGMPOOLKIND_32BIT_PD_PHYS";
4437 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4438 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
4439 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4440 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
4441 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4442 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
4443 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4444 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
4445 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4446 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
4447 case PGMPOOLKIND_PAE_PD_PHYS:
4448 return "PGMPOOLKIND_PAE_PD_PHYS";
4449 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4450 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
4451 case PGMPOOLKIND_PAE_PDPT:
4452 return "PGMPOOLKIND_PAE_PDPT";
4453 case PGMPOOLKIND_PAE_PDPT_PHYS:
4454 return "PGMPOOLKIND_PAE_PDPT_PHYS";
4455 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4456 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
4457 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4458 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
4459 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4460 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
4461 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4462 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
4463 case PGMPOOLKIND_64BIT_PML4:
4464 return "PGMPOOLKIND_64BIT_PML4";
4465 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4466 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
4467 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4468 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
4469 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4470 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
4471 case PGMPOOLKIND_ROOT_NESTED:
4472 return "PGMPOOLKIND_ROOT_NESTED";
4473 }
4474 return "Unknown kind!";
4475}
4476#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette