VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 22506

Last change on this file since 22506 was 22506, checked in by vboxsync, 15 years ago

Flush any monitored duplicates as we will disable write protection.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 179.5 KB
Line 
1/* $Id: PGMAllPool.cpp 22506 2009-08-27 11:28:43Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48RT_C_DECLS_BEGIN
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50#ifdef PGMPOOL_WITH_USER_TRACKING
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54#endif
55#ifdef PGMPOOL_WITH_CACHE
56static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
57#endif
58#ifdef PGMPOOL_WITH_MONITORING
59static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
60#endif
61#ifndef IN_RING3
62DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
63#endif
64#ifdef LOG_ENABLED
65static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
66#endif
67static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage);
68
69void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs);
70void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt);
71int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
72PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
73void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
74void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
75
76RT_C_DECLS_END
77
78
79/**
80 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
81 *
82 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
83 * @param enmKind The page kind.
84 */
85DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
86{
87 switch (enmKind)
88 {
89 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
90 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
91 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
92 return true;
93 default:
94 return false;
95 }
96}
97
98/** @def PGMPOOL_PAGE_2_LOCKED_PTR
99 * Maps a pool page pool into the current context and lock it (RC only).
100 *
101 * @returns VBox status code.
102 * @param pVM The VM handle.
103 * @param pPage The pool page.
104 *
105 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
106 * small page window employeed by that function. Be careful.
107 * @remark There is no need to assert on the result.
108 */
109#if defined(IN_RC)
110DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
111{
112 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
113
114 /* Make sure the dynamic mapping will not be reused. */
115 if (pv)
116 PGMDynLockHCPage(pVM, (uint8_t *)pv);
117
118 return pv;
119}
120#else
121# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
122#endif
123
124/** @def PGMPOOL_UNLOCK_PTR
125 * Unlock a previously locked dynamic caching (RC only).
126 *
127 * @returns VBox status code.
128 * @param pVM The VM handle.
129 * @param pPage The pool page.
130 *
131 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
132 * small page window employeed by that function. Be careful.
133 * @remark There is no need to assert on the result.
134 */
135#if defined(IN_RC)
136DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
137{
138 if (pvPage)
139 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
140}
141#else
142# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
143#endif
144
145
146#ifdef PGMPOOL_WITH_MONITORING
147/**
148 * Determin the size of a write instruction.
149 * @returns number of bytes written.
150 * @param pDis The disassembler state.
151 */
152static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
153{
154 /*
155 * This is very crude and possibly wrong for some opcodes,
156 * but since it's not really supposed to be called we can
157 * probably live with that.
158 */
159 return DISGetParamSize(pDis, &pDis->param1);
160}
161
162
163/**
164 * Flushes a chain of pages sharing the same access monitor.
165 *
166 * @returns VBox status code suitable for scheduling.
167 * @param pPool The pool.
168 * @param pPage A page in the chain.
169 */
170int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
171{
172 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
173
174 /*
175 * Find the list head.
176 */
177 uint16_t idx = pPage->idx;
178 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
179 {
180 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
181 {
182 idx = pPage->iMonitoredPrev;
183 Assert(idx != pPage->idx);
184 pPage = &pPool->aPages[idx];
185 }
186 }
187
188 /*
189 * Iterate the list flushing each shadow page.
190 */
191 int rc = VINF_SUCCESS;
192 for (;;)
193 {
194 idx = pPage->iMonitoredNext;
195 Assert(idx != pPage->idx);
196 if (pPage->idx >= PGMPOOL_IDX_FIRST)
197 {
198 int rc2 = pgmPoolFlushPage(pPool, pPage);
199 AssertRC(rc2);
200 }
201 /* next */
202 if (idx == NIL_PGMPOOL_IDX)
203 break;
204 pPage = &pPool->aPages[idx];
205 }
206 return rc;
207}
208
209
210/**
211 * Wrapper for getting the current context pointer to the entry being modified.
212 *
213 * @returns VBox status code suitable for scheduling.
214 * @param pVM VM Handle.
215 * @param pvDst Destination address
216 * @param pvSrc Source guest virtual address.
217 * @param GCPhysSrc The source guest physical address.
218 * @param cb Size of data to read
219 */
220DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
221{
222#if defined(IN_RING3)
223 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
224 return VINF_SUCCESS;
225#else
226 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
227 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
228#endif
229}
230
231/**
232 * Process shadow entries before they are changed by the guest.
233 *
234 * For PT entries we will clear them. For PD entries, we'll simply check
235 * for mapping conflicts and set the SyncCR3 FF if found.
236 *
237 * @param pVCpu VMCPU handle
238 * @param pPool The pool.
239 * @param pPage The head page.
240 * @param GCPhysFault The guest physical fault address.
241 * @param uAddress In R0 and GC this is the guest context fault address (flat).
242 * In R3 this is the host context 'fault' address.
243 * @param pDis The disassembler state for figuring out the write size.
244 * This need not be specified if the caller knows we won't do cross entry accesses.
245 */
246void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, PDISCPUSTATE pDis)
247{
248 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
249 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
250 const unsigned cbWrite = pDis ? pgmPoolDisasWriteSize(pDis) : 0;
251 PVM pVM = pPool->CTX_SUFF(pVM);
252
253 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)pvAddress, GCPhysFault, cbWrite));
254
255 for (;;)
256 {
257 union
258 {
259 void *pv;
260 PX86PT pPT;
261 PX86PTPAE pPTPae;
262 PX86PD pPD;
263 PX86PDPAE pPDPae;
264 PX86PDPT pPDPT;
265 PX86PML4 pPML4;
266 } uShw;
267
268 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
269
270 uShw.pv = NULL;
271 switch (pPage->enmKind)
272 {
273 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
274 {
275 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
276 const unsigned iShw = off / sizeof(X86PTE);
277 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
278 if (uShw.pPT->a[iShw].n.u1Present)
279 {
280# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
281 X86PTE GstPte;
282
283 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
284 AssertRC(rc);
285 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
286 pgmPoolTracDerefGCPhysHint(pPool, pPage,
287 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
288 GstPte.u & X86_PTE_PG_MASK);
289# endif
290 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
291 }
292 break;
293 }
294
295 /* page/2 sized */
296 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
297 {
298 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
299 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
300 {
301 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
302 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
303 if (uShw.pPTPae->a[iShw].n.u1Present)
304 {
305# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
306 X86PTE GstPte;
307 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
308 AssertRC(rc);
309
310 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
311 pgmPoolTracDerefGCPhysHint(pPool, pPage,
312 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
313 GstPte.u & X86_PTE_PG_MASK);
314# endif
315 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
316 }
317 }
318 break;
319 }
320
321 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
322 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
323 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
324 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
325 {
326 unsigned iGst = off / sizeof(X86PDE);
327 unsigned iShwPdpt = iGst / 256;
328 unsigned iShw = (iGst % 256) * 2;
329 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
330
331 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
332 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
333 {
334 for (unsigned i = 0; i < 2; i++)
335 {
336# ifndef IN_RING0
337 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
338 {
339 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
340 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
341 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
342 break;
343 }
344 else
345# endif /* !IN_RING0 */
346 if (uShw.pPDPae->a[iShw+i].n.u1Present)
347 {
348 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
349 pgmPoolFree(pVM,
350 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
351 pPage->idx,
352 iShw + i);
353 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
354 }
355
356 /* paranoia / a bit assumptive. */
357 if ( pDis
358 && (off & 3)
359 && (off & 3) + cbWrite > 4)
360 {
361 const unsigned iShw2 = iShw + 2 + i;
362 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
363 {
364# ifndef IN_RING0
365 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
366 {
367 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
368 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
369 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
370 break;
371 }
372 else
373# endif /* !IN_RING0 */
374 if (uShw.pPDPae->a[iShw2].n.u1Present)
375 {
376 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
377 pgmPoolFree(pVM,
378 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
379 pPage->idx,
380 iShw2);
381 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
382 }
383 }
384 }
385 }
386 }
387 break;
388 }
389
390 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
391 {
392 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
393 const unsigned iShw = off / sizeof(X86PTEPAE);
394 if (uShw.pPTPae->a[iShw].n.u1Present)
395 {
396# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
397 X86PTEPAE GstPte;
398 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
399 AssertRC(rc);
400
401 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
402 pgmPoolTracDerefGCPhysHint(pPool, pPage,
403 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
404 GstPte.u & X86_PTE_PAE_PG_MASK);
405# endif
406 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
407 }
408
409 /* paranoia / a bit assumptive. */
410 if ( pDis
411 && (off & 7)
412 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
413 {
414 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
415 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
416
417 if (uShw.pPTPae->a[iShw2].n.u1Present)
418 {
419# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
420 X86PTEPAE GstPte;
421# ifdef IN_RING3
422 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
423# else
424 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
425# endif
426 AssertRC(rc);
427 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
428 pgmPoolTracDerefGCPhysHint(pPool, pPage,
429 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
430 GstPte.u & X86_PTE_PAE_PG_MASK);
431# endif
432 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
433 }
434 }
435 break;
436 }
437
438 case PGMPOOLKIND_32BIT_PD:
439 {
440 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
441 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
442
443 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
444# ifndef IN_RING0
445 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
446 {
447 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
448 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
449 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
450 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
451 break;
452 }
453# endif /* !IN_RING0 */
454# ifndef IN_RING0
455 else
456# endif /* !IN_RING0 */
457 {
458 if (uShw.pPD->a[iShw].n.u1Present)
459 {
460 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
461 pgmPoolFree(pVM,
462 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
463 pPage->idx,
464 iShw);
465 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
466 }
467 }
468 /* paranoia / a bit assumptive. */
469 if ( pDis
470 && (off & 3)
471 && (off & 3) + cbWrite > sizeof(X86PTE))
472 {
473 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
474 if ( iShw2 != iShw
475 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
476 {
477# ifndef IN_RING0
478 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
479 {
480 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
481 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
482 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
483 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
484 break;
485 }
486# endif /* !IN_RING0 */
487# ifndef IN_RING0
488 else
489# endif /* !IN_RING0 */
490 {
491 if (uShw.pPD->a[iShw2].n.u1Present)
492 {
493 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
494 pgmPoolFree(pVM,
495 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
496 pPage->idx,
497 iShw2);
498 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
499 }
500 }
501 }
502 }
503#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
504 if ( uShw.pPD->a[iShw].n.u1Present
505 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
506 {
507 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
508# ifdef IN_RC /* TLB load - we're pushing things a bit... */
509 ASMProbeReadByte(pvAddress);
510# endif
511 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
512 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
513 }
514#endif
515 break;
516 }
517
518 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
519 {
520 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
521 const unsigned iShw = off / sizeof(X86PDEPAE);
522#ifndef IN_RING0
523 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
524 {
525 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
526 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
527 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
528 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
529 break;
530 }
531#endif /* !IN_RING0 */
532 /*
533 * Causes trouble when the guest uses a PDE to refer to the whole page table level
534 * structure. (Invalidate here; faults later on when it tries to change the page
535 * table entries -> recheck; probably only applies to the RC case.)
536 */
537# ifndef IN_RING0
538 else
539# endif /* !IN_RING0 */
540 {
541 if (uShw.pPDPae->a[iShw].n.u1Present)
542 {
543 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
544 pgmPoolFree(pVM,
545 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
546 pPage->idx,
547 iShw);
548 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
549 }
550 }
551 /* paranoia / a bit assumptive. */
552 if ( pDis
553 && (off & 7)
554 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
555 {
556 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
557 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
558
559#ifndef IN_RING0
560 if ( iShw2 != iShw
561 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
562 {
563 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
564 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
565 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
566 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
567 break;
568 }
569#endif /* !IN_RING0 */
570# ifndef IN_RING0
571 else
572# endif /* !IN_RING0 */
573 if (uShw.pPDPae->a[iShw2].n.u1Present)
574 {
575 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
576 pgmPoolFree(pVM,
577 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
578 pPage->idx,
579 iShw2);
580 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
581 }
582 }
583 break;
584 }
585
586 case PGMPOOLKIND_PAE_PDPT:
587 {
588 /*
589 * Hopefully this doesn't happen very often:
590 * - touching unused parts of the page
591 * - messing with the bits of pd pointers without changing the physical address
592 */
593 /* PDPT roots are not page aligned; 32 byte only! */
594 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
595
596 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
597 const unsigned iShw = offPdpt / sizeof(X86PDPE);
598 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
599 {
600# ifndef IN_RING0
601 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
602 {
603 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
604 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
605 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
606 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
607 break;
608 }
609# endif /* !IN_RING0 */
610# ifndef IN_RING0
611 else
612# endif /* !IN_RING0 */
613 if (uShw.pPDPT->a[iShw].n.u1Present)
614 {
615 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
616 pgmPoolFree(pVM,
617 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
618 pPage->idx,
619 iShw);
620 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
621 }
622
623 /* paranoia / a bit assumptive. */
624 if ( pDis
625 && (offPdpt & 7)
626 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
627 {
628 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
629 if ( iShw2 != iShw
630 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
631 {
632# ifndef IN_RING0
633 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
634 {
635 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
636 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
637 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
638 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
639 break;
640 }
641# endif /* !IN_RING0 */
642# ifndef IN_RING0
643 else
644# endif /* !IN_RING0 */
645 if (uShw.pPDPT->a[iShw2].n.u1Present)
646 {
647 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
648 pgmPoolFree(pVM,
649 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
650 pPage->idx,
651 iShw2);
652 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
653 }
654 }
655 }
656 }
657 break;
658 }
659
660#ifndef IN_RC
661 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
662 {
663 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
664 const unsigned iShw = off / sizeof(X86PDEPAE);
665 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
666 if (uShw.pPDPae->a[iShw].n.u1Present)
667 {
668 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
669 pgmPoolFree(pVM,
670 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
671 pPage->idx,
672 iShw);
673 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
674 }
675 /* paranoia / a bit assumptive. */
676 if ( pDis
677 && (off & 7)
678 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
679 {
680 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
681 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
682
683 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
684 if (uShw.pPDPae->a[iShw2].n.u1Present)
685 {
686 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
687 pgmPoolFree(pVM,
688 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
689 pPage->idx,
690 iShw2);
691 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
692 }
693 }
694 break;
695 }
696
697 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
698 {
699 /*
700 * Hopefully this doesn't happen very often:
701 * - messing with the bits of pd pointers without changing the physical address
702 */
703 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
704 {
705 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
706 const unsigned iShw = off / sizeof(X86PDPE);
707 if (uShw.pPDPT->a[iShw].n.u1Present)
708 {
709 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
710 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
711 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
712 }
713 /* paranoia / a bit assumptive. */
714 if ( pDis
715 && (off & 7)
716 && (off & 7) + cbWrite > sizeof(X86PDPE))
717 {
718 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
719 if (uShw.pPDPT->a[iShw2].n.u1Present)
720 {
721 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
722 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
723 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
724 }
725 }
726 }
727 break;
728 }
729
730 case PGMPOOLKIND_64BIT_PML4:
731 {
732 /*
733 * Hopefully this doesn't happen very often:
734 * - messing with the bits of pd pointers without changing the physical address
735 */
736 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
737 {
738 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
739 const unsigned iShw = off / sizeof(X86PDPE);
740 if (uShw.pPML4->a[iShw].n.u1Present)
741 {
742 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
743 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
744 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
745 }
746 /* paranoia / a bit assumptive. */
747 if ( pDis
748 && (off & 7)
749 && (off & 7) + cbWrite > sizeof(X86PDPE))
750 {
751 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
752 if (uShw.pPML4->a[iShw2].n.u1Present)
753 {
754 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
755 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
756 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
757 }
758 }
759 }
760 break;
761 }
762#endif /* IN_RING0 */
763
764 default:
765 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
766 }
767 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
768
769 /* next */
770 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
771 return;
772 pPage = &pPool->aPages[pPage->iMonitoredNext];
773 }
774}
775
776# ifndef IN_RING3
777/**
778 * Checks if a access could be a fork operation in progress.
779 *
780 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
781 *
782 * @returns true if it's likly that we're forking, otherwise false.
783 * @param pPool The pool.
784 * @param pDis The disassembled instruction.
785 * @param offFault The access offset.
786 */
787DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
788{
789 /*
790 * i386 linux is using btr to clear X86_PTE_RW.
791 * The functions involved are (2.6.16 source inspection):
792 * clear_bit
793 * ptep_set_wrprotect
794 * copy_one_pte
795 * copy_pte_range
796 * copy_pmd_range
797 * copy_pud_range
798 * copy_page_range
799 * dup_mmap
800 * dup_mm
801 * copy_mm
802 * copy_process
803 * do_fork
804 */
805 if ( pDis->pCurInstr->opcode == OP_BTR
806 && !(offFault & 4)
807 /** @todo Validate that the bit index is X86_PTE_RW. */
808 )
809 {
810 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
811 return true;
812 }
813 return false;
814}
815
816
817/**
818 * Determine whether the page is likely to have been reused.
819 *
820 * @returns true if we consider the page as being reused for a different purpose.
821 * @returns false if we consider it to still be a paging page.
822 * @param pVM VM Handle.
823 * @param pVCpu VMCPU Handle.
824 * @param pRegFrame Trap register frame.
825 * @param pDis The disassembly info for the faulting instruction.
826 * @param pvFault The fault address.
827 *
828 * @remark The REP prefix check is left to the caller because of STOSD/W.
829 */
830DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
831{
832#ifndef IN_RC
833 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
834 if ( HWACCMHasPendingIrq(pVM)
835 && (pRegFrame->rsp - pvFault) < 32)
836 {
837 /* Fault caused by stack writes while trying to inject an interrupt event. */
838 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
839 return true;
840 }
841#else
842 NOREF(pVM); NOREF(pvFault);
843#endif
844
845 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
846
847 /* Non-supervisor mode write means it's used for something else. */
848 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
849 return true;
850
851 switch (pDis->pCurInstr->opcode)
852 {
853 /* call implies the actual push of the return address faulted */
854 case OP_CALL:
855 Log4(("pgmPoolMonitorIsReused: CALL\n"));
856 return true;
857 case OP_PUSH:
858 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
859 return true;
860 case OP_PUSHF:
861 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
862 return true;
863 case OP_PUSHA:
864 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
865 return true;
866 case OP_FXSAVE:
867 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
868 return true;
869 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
870 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
871 return true;
872 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
873 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
874 return true;
875 case OP_MOVSWD:
876 case OP_STOSWD:
877 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
878 && pRegFrame->rcx >= 0x40
879 )
880 {
881 Assert(pDis->mode == CPUMODE_64BIT);
882
883 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
884 return true;
885 }
886 return false;
887 }
888 if ( ( (pDis->param1.flags & USE_REG_GEN32)
889 || (pDis->param1.flags & USE_REG_GEN64))
890 && (pDis->param1.base.reg_gen == USE_REG_ESP))
891 {
892 Log4(("pgmPoolMonitorIsReused: ESP\n"));
893 return true;
894 }
895
896 return false;
897}
898
899
900/**
901 * Flushes the page being accessed.
902 *
903 * @returns VBox status code suitable for scheduling.
904 * @param pVM The VM handle.
905 * @param pVCpu The VMCPU handle.
906 * @param pPool The pool.
907 * @param pPage The pool page (head).
908 * @param pDis The disassembly of the write instruction.
909 * @param pRegFrame The trap register frame.
910 * @param GCPhysFault The fault address as guest physical address.
911 * @param pvFault The fault address.
912 */
913static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
914 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
915{
916 /*
917 * First, do the flushing.
918 */
919 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
920
921 /*
922 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
923 * @todo: why is this necessary? an instruction restart would be sufficient, wouldn't it?
924 */
925 uint32_t cbWritten;
926 int rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten);
927 if (RT_SUCCESS(rc2))
928 pRegFrame->rip += pDis->opsize;
929 else if (rc2 == VERR_EM_INTERPRETER)
930 {
931#ifdef IN_RC
932 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
933 {
934 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
935 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
936 rc = VINF_SUCCESS;
937 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
938 }
939 else
940#endif
941 {
942 rc = VINF_EM_RAW_EMULATE_INSTR;
943 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
944 }
945 }
946 else
947 rc = rc2;
948
949 /* See use in pgmPoolAccessHandlerSimple(). */
950 PGM_INVL_VCPU_TLBS(pVCpu);
951
952 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
953 return rc;
954
955}
956
957
958/**
959 * Handles the STOSD write accesses.
960 *
961 * @returns VBox status code suitable for scheduling.
962 * @param pVM The VM handle.
963 * @param pPool The pool.
964 * @param pPage The pool page (head).
965 * @param pDis The disassembly of the write instruction.
966 * @param pRegFrame The trap register frame.
967 * @param GCPhysFault The fault address as guest physical address.
968 * @param pvFault The fault address.
969 */
970DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
971 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
972{
973 unsigned uIncrement = pDis->param1.size;
974
975 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
976 Assert(pRegFrame->rcx <= 0x20);
977
978#ifdef VBOX_STRICT
979 if (pDis->opmode == CPUMODE_32BIT)
980 Assert(uIncrement == 4);
981 else
982 Assert(uIncrement == 8);
983#endif
984
985 Log3(("pgmPoolAccessHandlerSTOSD\n"));
986
987 /*
988 * Increment the modification counter and insert it into the list
989 * of modified pages the first time.
990 */
991 if (!pPage->cModifications++)
992 pgmPoolMonitorModifiedInsert(pPool, pPage);
993
994 /*
995 * Execute REP STOSD.
996 *
997 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
998 * write situation, meaning that it's safe to write here.
999 */
1000 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1001 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
1002 while (pRegFrame->rcx)
1003 {
1004#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1005 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1006 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1007 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1008#else
1009 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1010#endif
1011#ifdef IN_RC
1012 *(uint32_t *)pu32 = pRegFrame->eax;
1013#else
1014 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
1015#endif
1016 pu32 += uIncrement;
1017 GCPhysFault += uIncrement;
1018 pRegFrame->rdi += uIncrement;
1019 pRegFrame->rcx--;
1020 }
1021 pRegFrame->rip += pDis->opsize;
1022
1023#ifdef IN_RC
1024 /* See use in pgmPoolAccessHandlerSimple(). */
1025 PGM_INVL_VCPU_TLBS(pVCpu);
1026#endif
1027
1028 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1029 return VINF_SUCCESS;
1030}
1031
1032
1033/**
1034 * Handles the simple write accesses.
1035 *
1036 * @returns VBox status code suitable for scheduling.
1037 * @param pVM The VM handle.
1038 * @param pVCpu The VMCPU handle.
1039 * @param pPool The pool.
1040 * @param pPage The pool page (head).
1041 * @param pDis The disassembly of the write instruction.
1042 * @param pRegFrame The trap register frame.
1043 * @param GCPhysFault The fault address as guest physical address.
1044 * @param pvFault The fault address.
1045 */
1046DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1047 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1048{
1049 Log3(("pgmPoolAccessHandlerSimple\n"));
1050 /*
1051 * Increment the modification counter and insert it into the list
1052 * of modified pages the first time.
1053 */
1054 if (!pPage->cModifications++)
1055 pgmPoolMonitorModifiedInsert(pPool, pPage);
1056
1057 /*
1058 * Clear all the pages. ASSUMES that pvFault is readable.
1059 */
1060#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1061 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1062 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, pDis);
1063 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1064#else
1065 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, pDis);
1066#endif
1067
1068 /*
1069 * Interpret the instruction.
1070 */
1071 uint32_t cb;
1072 int rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb);
1073 if (RT_SUCCESS(rc))
1074 pRegFrame->rip += pDis->opsize;
1075 else if (rc == VERR_EM_INTERPRETER)
1076 {
1077 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1078 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1079 rc = VINF_EM_RAW_EMULATE_INSTR;
1080 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1081 }
1082
1083#ifdef IN_RC
1084 /*
1085 * Quick hack, with logging enabled we're getting stale
1086 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1087 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1088 * have to be fixed to support this. But that'll have to wait till next week.
1089 *
1090 * An alternative is to keep track of the changed PTEs together with the
1091 * GCPhys from the guest PT. This may proove expensive though.
1092 *
1093 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1094 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1095 */
1096 PGM_INVL_VCPU_TLBS(pVCpu);
1097#endif
1098
1099 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1100 return rc;
1101}
1102
1103/**
1104 * \#PF Handler callback for PT write accesses.
1105 *
1106 * @returns VBox status code (appropriate for GC return).
1107 * @param pVM VM Handle.
1108 * @param uErrorCode CPU Error code.
1109 * @param pRegFrame Trap register frame.
1110 * NULL on DMA and other non CPU access.
1111 * @param pvFault The fault address (cr2).
1112 * @param GCPhysFault The GC physical address corresponding to pvFault.
1113 * @param pvUser User argument.
1114 */
1115DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1116{
1117 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1118 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1119 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1120 PVMCPU pVCpu = VMMGetCpu(pVM);
1121 unsigned cMaxModifications;
1122 bool fForcedFlush = false;
1123
1124 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1125
1126 pgmLock(pVM);
1127 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1128 {
1129 /* Pool page changed while we were waiting for the lock; ignore. */
1130 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1131 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1132 pgmUnlock(pVM);
1133 return VINF_SUCCESS;
1134 }
1135
1136 /*
1137 * Disassemble the faulting instruction.
1138 */
1139 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1140 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1141 AssertReturnStmt(rc == VINF_SUCCESS, pgmUnlock(pVM), rc);
1142
1143 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1144
1145 /*
1146 * We should ALWAYS have the list head as user parameter. This
1147 * is because we use that page to record the changes.
1148 */
1149 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1150#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1151 Assert(!pPage->fDirty);
1152#endif
1153
1154 /* Maximum nr of modifications depends on the guest mode. */
1155 if (pDis->mode == CPUMODE_32BIT)
1156 cMaxModifications = 48;
1157 else
1158 cMaxModifications = 24;
1159
1160 /*
1161 * Incremental page table updates should weight more than random ones.
1162 * (Only applies when started from offset 0)
1163 */
1164 pVCpu->pgm.s.cPoolAccessHandler++;
1165 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1166 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1167 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1168 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1169 {
1170 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1171 pPage->cModifications = pPage->cModifications * 2;
1172 pPage->pvLastAccessHandlerFault = pvFault;
1173 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1174 if (pPage->cModifications > cMaxModifications)
1175 {
1176 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1177 fForcedFlush = true;
1178 }
1179 }
1180
1181 if (pPage->cModifications >= cMaxModifications)
1182 Log(("Mod overflow %VGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1183
1184 /*
1185 * Check if it's worth dealing with.
1186 */
1187 bool fReused = false;
1188 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1189 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1190 )
1191 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1192 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1193 {
1194 /*
1195 * Simple instructions, no REP prefix.
1196 */
1197 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1198 {
1199 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1200
1201 /* A mov instruction to change the first page table entry will be remembered so we can detect
1202 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1203 */
1204 if ( rc == VINF_SUCCESS
1205 && pDis->pCurInstr->opcode == OP_MOV
1206 && (pvFault & PAGE_OFFSET_MASK) == 0)
1207 {
1208 pPage->pvLastAccessHandlerFault = pvFault;
1209 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1210 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1211 /* Make sure we don't kick out a page too quickly. */
1212 if (pPage->cModifications > 8)
1213 pPage->cModifications = 2;
1214 }
1215 else
1216 if (pPage->pvLastAccessHandlerFault == pvFault)
1217 {
1218 /* ignore the 2nd write to this page table entry. */
1219 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1220 }
1221 else
1222 {
1223 pPage->pvLastAccessHandlerFault = 0;
1224 pPage->pvLastAccessHandlerRip = 0;
1225 }
1226
1227 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1228 pgmUnlock(pVM);
1229 return rc;
1230 }
1231
1232 /*
1233 * Windows is frequently doing small memset() operations (netio test 4k+).
1234 * We have to deal with these or we'll kill the cache and performance.
1235 */
1236 if ( pDis->pCurInstr->opcode == OP_STOSWD
1237 && !pRegFrame->eflags.Bits.u1DF
1238 && pDis->opmode == pDis->mode
1239 && pDis->addrmode == pDis->mode)
1240 {
1241 bool fValidStosd = false;
1242
1243 if ( pDis->mode == CPUMODE_32BIT
1244 && pDis->prefix == PREFIX_REP
1245 && pRegFrame->ecx <= 0x20
1246 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1247 && !((uintptr_t)pvFault & 3)
1248 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1249 )
1250 {
1251 fValidStosd = true;
1252 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1253 }
1254 else
1255 if ( pDis->mode == CPUMODE_64BIT
1256 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1257 && pRegFrame->rcx <= 0x20
1258 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1259 && !((uintptr_t)pvFault & 7)
1260 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1261 )
1262 {
1263 fValidStosd = true;
1264 }
1265
1266 if (fValidStosd)
1267 {
1268 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1269 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1270 pgmUnlock(pVM);
1271 return rc;
1272 }
1273 }
1274
1275 /* REP prefix, don't bother. */
1276 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1277 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1278 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1279 }
1280
1281#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1282 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1283 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1284 */
1285 if ( !fReused
1286 && !fForcedFlush
1287 && pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1288 && pPage->cModifications >= cMaxModifications)
1289 {
1290 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1291 Assert(pPage->fDirty == false);
1292
1293 /* Flush any monitored duplicates as we will disable write protection. */
1294 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1295 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1296 {
1297 PPGMPOOLPAGE pPageHead = pPage;
1298
1299 /* Find the monitor head. */
1300 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1301 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1302
1303 while (pPageHead)
1304 {
1305 unsigned idxNext = pPageHead->iMonitoredNext;
1306
1307 if (pPageHead != pPage)
1308 {
1309 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1310 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1311 AssertRC(rc2);
1312 }
1313
1314 if (idxNext == NIL_PGMPOOL_IDX)
1315 break;
1316
1317 pPageHead = &pPool->aPages[idxNext];
1318 }
1319 }
1320
1321 /* Temporarily allow write access to the page table again. */
1322 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
1323 if (rc == VINF_SUCCESS)
1324 {
1325 rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
1326 AssertMsg(rc == VINF_SUCCESS
1327 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1328 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1329 || rc == VERR_PAGE_NOT_PRESENT,
1330 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1331
1332 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1333
1334 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1335 pgmUnlock(pVM);
1336 return rc;
1337 }
1338 }
1339#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1340
1341 /*
1342 * Not worth it, so flush it.
1343 *
1344 * If we considered it to be reused, don't go back to ring-3
1345 * to emulate failed instructions since we usually cannot
1346 * interpret then. This may be a bit risky, in which case
1347 * the reuse detection must be fixed.
1348 */
1349 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1350 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1351 rc = VINF_SUCCESS;
1352 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1353 pgmUnlock(pVM);
1354 return rc;
1355}
1356
1357# endif /* !IN_RING3 */
1358
1359# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1360/**
1361 * Clear references to guest physical memory in a PAE / PAE page table.
1362 *
1363 * @returns nr of changed PTEs
1364 * @param pPool The pool.
1365 * @param pPage The page.
1366 * @param pShwPT The shadow page table (mapping of the page).
1367 * @param pGstPT The guest page table.
1368 * @param pGstPT The old cached guest page table.
1369 */
1370DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT, PCX86PTPAE pOldGstPT)
1371{
1372 unsigned cChanged = 0;
1373
1374 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
1375 {
1376 if (pShwPT->a[i].n.u1Present)
1377 {
1378 /* The the old cached PTE is identical, then there's no need to flush the shadow copy. */
1379 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1380 {
1381#ifdef VBOX_STRICT
1382 RTHCPHYS HCPhys = -1;
1383 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1384 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RGp %RHp vs %RHp\n", rc, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK), HCPhys));
1385#endif
1386 uint64_t uHostAttr = pShwPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1387 bool fHostRW = !!(pShwPT->a[i].u & X86_PTE_RW);
1388 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1389 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1390
1391 if ( uHostAttr == uGuestAttr
1392 && fHostRW <= fGuestRW)
1393 continue;
1394 }
1395 cChanged++;
1396 /* Something was changed, so flush it. */
1397 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
1398 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1399 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
1400 ASMAtomicWriteSize(&pShwPT->a[i].u, 0);
1401 }
1402 }
1403 return cChanged;
1404}
1405
1406
1407/**
1408 * Flush a dirty page
1409 *
1410 * @param pVM VM Handle.
1411 * @param pPool The pool.
1412 * @param idxSlot Dirty array slot index
1413 * @param fForceRemoval Force removal from the dirty page list
1414 */
1415static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fForceRemoval = false)
1416{
1417 PPGMPOOLPAGE pPage;
1418 unsigned idxPage;
1419
1420 Assert(idxSlot < RT_ELEMENTS(pPool->aIdxDirtyPages));
1421 if (pPool->aIdxDirtyPages[idxSlot] == NIL_PGMPOOL_IDX)
1422 return;
1423
1424 idxPage = pPool->aIdxDirtyPages[idxSlot];
1425 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1426 pPage = &pPool->aPages[idxPage];
1427 Assert(pPage->idx == idxPage);
1428 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1429
1430 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1431 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1432
1433 /* Flush those PTEs that have changed. */
1434 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1435 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1436 void *pvGst;
1437 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1438 unsigned cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst, (PCX86PTPAE)&pPool->aDirtyPages[idxSlot][0]);
1439 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1440
1441 /** Note: we might want to consider keeping the dirty page active in case there were many changes. */
1442
1443 /* Write protect the page again to catch all write accesses. */
1444 rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys);
1445 Assert(rc == VINF_SUCCESS);
1446 pPage->fDirty = false;
1447 pPage->fZeroed = true;
1448
1449 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1450 Assert(pPage->cModifications);
1451 if (cChanges < 4)
1452 pPage->cModifications = 1; /* must use > 0 here */
1453 else
1454 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1455
1456 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1457 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages))
1458 pPool->idxFreeDirtyPage = idxSlot;
1459
1460 pPool->cDirtyPages--;
1461 pPool->aIdxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1462 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1463 Log(("Removed dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1464}
1465
1466/**
1467 * Add a new dirty page
1468 *
1469 * @param pVM VM Handle.
1470 * @param pPool The pool.
1471 * @param pPage The page.
1472 */
1473void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1474{
1475 unsigned idxFree;
1476
1477 Assert(PGMIsLocked(pVM));
1478 AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
1479
1480 if (pPage->fDirty)
1481 return;
1482
1483 idxFree = pPool->idxFreeDirtyPage;
1484 Assert(idxFree < RT_ELEMENTS(pPool->aIdxDirtyPages));
1485 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1486
1487 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aIdxDirtyPages))
1488 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* force removal */);
1489 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages));
1490 AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1491
1492 /* Make a copy of the guest page table as we require valid GCPhys addresses when removing
1493 * references to physical pages. (the HCPhys linear lookup is *extremely* expensive!)
1494 */
1495 void *pvGst;
1496 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1497 memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
1498
1499 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1500 Log(("Mark dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1501 pPage->fDirty = true;
1502 pPage->idxDirty = idxFree;
1503 pPool->aIdxDirtyPages[idxFree] = pPage->idx;
1504 pPool->cDirtyPages++;
1505
1506 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1507 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages)
1508 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1509 {
1510 unsigned i;
1511 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1512 {
1513 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1514 if (pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1515 {
1516 pPool->idxFreeDirtyPage = idxFree;
1517 break;
1518 }
1519 }
1520 Assert(i != RT_ELEMENTS(pPool->aIdxDirtyPages));
1521 }
1522
1523 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages) || pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1524 return;
1525}
1526
1527
1528/**
1529 * Reset all dirty pages by reinstating page monitoring.
1530 *
1531 * @param pVM VM Handle.
1532 * @param fForceRemoval Force removal of all dirty pages
1533 */
1534void pgmPoolResetDirtyPages(PVM pVM, bool fForceRemoval)
1535{
1536 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1537 Assert(PGMIsLocked(pVM));
1538 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1539
1540 if (!pPool->cDirtyPages)
1541 return;
1542
1543 Log(("pgmPoolResetDirtyPages\n"));
1544 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1545 pgmPoolFlushDirtyPage(pVM, pPool, i, fForceRemoval);
1546
1547 pPool->idxFreeDirtyPage = 0;
1548 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1549 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1550 {
1551 unsigned i;
1552 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1553 {
1554 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1555 {
1556 pPool->idxFreeDirtyPage = i;
1557 break;
1558 }
1559 }
1560 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1561 }
1562
1563 Assert(pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages));
1564 return;
1565}
1566# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1567#endif /* PGMPOOL_WITH_MONITORING */
1568
1569#ifdef PGMPOOL_WITH_CACHE
1570
1571/**
1572 * Inserts a page into the GCPhys hash table.
1573 *
1574 * @param pPool The pool.
1575 * @param pPage The page.
1576 */
1577DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1578{
1579 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1580 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1581 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1582 pPage->iNext = pPool->aiHash[iHash];
1583 pPool->aiHash[iHash] = pPage->idx;
1584}
1585
1586
1587/**
1588 * Removes a page from the GCPhys hash table.
1589 *
1590 * @param pPool The pool.
1591 * @param pPage The page.
1592 */
1593DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1594{
1595 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1596 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1597 if (pPool->aiHash[iHash] == pPage->idx)
1598 pPool->aiHash[iHash] = pPage->iNext;
1599 else
1600 {
1601 uint16_t iPrev = pPool->aiHash[iHash];
1602 for (;;)
1603 {
1604 const int16_t i = pPool->aPages[iPrev].iNext;
1605 if (i == pPage->idx)
1606 {
1607 pPool->aPages[iPrev].iNext = pPage->iNext;
1608 break;
1609 }
1610 if (i == NIL_PGMPOOL_IDX)
1611 {
1612 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1613 break;
1614 }
1615 iPrev = i;
1616 }
1617 }
1618 pPage->iNext = NIL_PGMPOOL_IDX;
1619}
1620
1621
1622/**
1623 * Frees up one cache page.
1624 *
1625 * @returns VBox status code.
1626 * @retval VINF_SUCCESS on success.
1627 * @param pPool The pool.
1628 * @param iUser The user index.
1629 */
1630static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1631{
1632#ifndef IN_RC
1633 const PVM pVM = pPool->CTX_SUFF(pVM);
1634#endif
1635 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1636 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1637
1638 /*
1639 * Select one page from the tail of the age list.
1640 */
1641 PPGMPOOLPAGE pPage;
1642 for (unsigned iLoop = 0; ; iLoop++)
1643 {
1644 uint16_t iToFree = pPool->iAgeTail;
1645 if (iToFree == iUser)
1646 iToFree = pPool->aPages[iToFree].iAgePrev;
1647/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1648 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1649 {
1650 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1651 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1652 {
1653 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1654 continue;
1655 iToFree = i;
1656 break;
1657 }
1658 }
1659*/
1660 Assert(iToFree != iUser);
1661 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1662 pPage = &pPool->aPages[iToFree];
1663
1664 /*
1665 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1666 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1667 */
1668 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1669 break;
1670 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1671 pgmPoolCacheUsed(pPool, pPage);
1672 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1673 }
1674
1675 /*
1676 * Found a usable page, flush it and return.
1677 */
1678 int rc = pgmPoolFlushPage(pPool, pPage);
1679 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1680 if (rc == VINF_SUCCESS)
1681 PGM_INVL_ALL_VCPU_TLBS(pVM);
1682 return rc;
1683}
1684
1685
1686/**
1687 * Checks if a kind mismatch is really a page being reused
1688 * or if it's just normal remappings.
1689 *
1690 * @returns true if reused and the cached page (enmKind1) should be flushed
1691 * @returns false if not reused.
1692 * @param enmKind1 The kind of the cached page.
1693 * @param enmKind2 The kind of the requested page.
1694 */
1695static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1696{
1697 switch (enmKind1)
1698 {
1699 /*
1700 * Never reuse them. There is no remapping in non-paging mode.
1701 */
1702 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1703 case PGMPOOLKIND_32BIT_PD_PHYS:
1704 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1705 case PGMPOOLKIND_PAE_PD_PHYS:
1706 case PGMPOOLKIND_PAE_PDPT_PHYS:
1707 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1708 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1709 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1710 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1711 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1712 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1713 return false;
1714
1715 /*
1716 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1717 */
1718 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1719 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1720 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1721 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1722 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1723 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1724 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1725 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1726 case PGMPOOLKIND_32BIT_PD:
1727 case PGMPOOLKIND_PAE_PDPT:
1728 switch (enmKind2)
1729 {
1730 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1731 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1732 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1733 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1734 case PGMPOOLKIND_64BIT_PML4:
1735 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1736 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1737 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1738 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1739 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1740 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1741 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1742 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1743 return true;
1744 default:
1745 return false;
1746 }
1747
1748 /*
1749 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1750 */
1751 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1752 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1753 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1754 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1755 case PGMPOOLKIND_64BIT_PML4:
1756 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1757 switch (enmKind2)
1758 {
1759 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1760 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1761 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1762 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1763 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1764 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1765 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1766 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1767 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1768 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1769 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1770 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1771 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1772 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1773 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1774 return true;
1775 default:
1776 return false;
1777 }
1778
1779 /*
1780 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1781 */
1782 case PGMPOOLKIND_ROOT_NESTED:
1783 return false;
1784
1785 default:
1786 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1787 }
1788}
1789
1790
1791/**
1792 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1793 *
1794 * @returns VBox status code.
1795 * @retval VINF_PGM_CACHED_PAGE on success.
1796 * @retval VERR_FILE_NOT_FOUND if not found.
1797 * @param pPool The pool.
1798 * @param GCPhys The GC physical address of the page we're gonna shadow.
1799 * @param enmKind The kind of mapping.
1800 * @param enmAccess Access type for the mapping (only relevant for big pages)
1801 * @param iUser The shadow page pool index of the user table.
1802 * @param iUserTable The index into the user table (shadowed).
1803 * @param ppPage Where to store the pointer to the page.
1804 */
1805static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1806{
1807#ifndef IN_RC
1808 const PVM pVM = pPool->CTX_SUFF(pVM);
1809#endif
1810 /*
1811 * Look up the GCPhys in the hash.
1812 */
1813 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1814 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
1815 if (i != NIL_PGMPOOL_IDX)
1816 {
1817 do
1818 {
1819 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1820 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1821 if (pPage->GCPhys == GCPhys)
1822 {
1823 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
1824 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
1825 {
1826 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
1827 * doesn't flush it in case there are no more free use records.
1828 */
1829 pgmPoolCacheUsed(pPool, pPage);
1830
1831 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1832 if (RT_SUCCESS(rc))
1833 {
1834 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
1835 *ppPage = pPage;
1836 if (pPage->cModifications)
1837 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
1838 STAM_COUNTER_INC(&pPool->StatCacheHits);
1839 return VINF_PGM_CACHED_PAGE;
1840 }
1841 return rc;
1842 }
1843
1844 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
1845 {
1846 /*
1847 * The kind is different. In some cases we should now flush the page
1848 * as it has been reused, but in most cases this is normal remapping
1849 * of PDs as PT or big pages using the GCPhys field in a slightly
1850 * different way than the other kinds.
1851 */
1852 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1853 {
1854 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1855 pgmPoolFlushPage(pPool, pPage);
1856 PGM_INVL_VCPU_TLBS(VMMGetCpu(pVM)); /* see PT handler. */
1857 break;
1858 }
1859 }
1860 }
1861
1862 /* next */
1863 i = pPage->iNext;
1864 } while (i != NIL_PGMPOOL_IDX);
1865 }
1866
1867 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
1868 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1869 return VERR_FILE_NOT_FOUND;
1870}
1871
1872
1873/**
1874 * Inserts a page into the cache.
1875 *
1876 * @param pPool The pool.
1877 * @param pPage The cached page.
1878 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1879 */
1880static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1881{
1882 /*
1883 * Insert into the GCPhys hash if the page is fit for that.
1884 */
1885 Assert(!pPage->fCached);
1886 if (fCanBeCached)
1887 {
1888 pPage->fCached = true;
1889 pgmPoolHashInsert(pPool, pPage);
1890 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1891 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1892 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1893 }
1894 else
1895 {
1896 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1897 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1898 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1899 }
1900
1901 /*
1902 * Insert at the head of the age list.
1903 */
1904 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1905 pPage->iAgeNext = pPool->iAgeHead;
1906 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1907 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1908 else
1909 pPool->iAgeTail = pPage->idx;
1910 pPool->iAgeHead = pPage->idx;
1911}
1912
1913
1914/**
1915 * Flushes a cached page.
1916 *
1917 * @param pPool The pool.
1918 * @param pPage The cached page.
1919 */
1920static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1921{
1922 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1923
1924 /*
1925 * Remove the page from the hash.
1926 */
1927 if (pPage->fCached)
1928 {
1929 pPage->fCached = false;
1930 pgmPoolHashRemove(pPool, pPage);
1931 }
1932 else
1933 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1934
1935 /*
1936 * Remove it from the age list.
1937 */
1938 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1939 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1940 else
1941 pPool->iAgeTail = pPage->iAgePrev;
1942 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1943 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1944 else
1945 pPool->iAgeHead = pPage->iAgeNext;
1946 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1947 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1948}
1949
1950#endif /* PGMPOOL_WITH_CACHE */
1951#ifdef PGMPOOL_WITH_MONITORING
1952
1953/**
1954 * Looks for pages sharing the monitor.
1955 *
1956 * @returns Pointer to the head page.
1957 * @returns NULL if not found.
1958 * @param pPool The Pool
1959 * @param pNewPage The page which is going to be monitored.
1960 */
1961static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1962{
1963#ifdef PGMPOOL_WITH_CACHE
1964 /*
1965 * Look up the GCPhys in the hash.
1966 */
1967 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1968 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1969 if (i == NIL_PGMPOOL_IDX)
1970 return NULL;
1971 do
1972 {
1973 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1974 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1975 && pPage != pNewPage)
1976 {
1977 switch (pPage->enmKind)
1978 {
1979 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1980 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1981 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1982 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1983 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1984 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1985 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1986 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1987 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1988 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1989 case PGMPOOLKIND_64BIT_PML4:
1990 case PGMPOOLKIND_32BIT_PD:
1991 case PGMPOOLKIND_PAE_PDPT:
1992 {
1993 /* find the head */
1994 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1995 {
1996 Assert(pPage->iMonitoredPrev != pPage->idx);
1997 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1998 }
1999 return pPage;
2000 }
2001
2002 /* ignore, no monitoring. */
2003 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2004 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2005 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2006 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2007 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2008 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2009 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2010 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2011 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2012 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2013 case PGMPOOLKIND_ROOT_NESTED:
2014 case PGMPOOLKIND_PAE_PD_PHYS:
2015 case PGMPOOLKIND_PAE_PDPT_PHYS:
2016 case PGMPOOLKIND_32BIT_PD_PHYS:
2017 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2018 break;
2019 default:
2020 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2021 }
2022 }
2023
2024 /* next */
2025 i = pPage->iNext;
2026 } while (i != NIL_PGMPOOL_IDX);
2027#endif
2028 return NULL;
2029}
2030
2031
2032/**
2033 * Enabled write monitoring of a guest page.
2034 *
2035 * @returns VBox status code.
2036 * @retval VINF_SUCCESS on success.
2037 * @param pPool The pool.
2038 * @param pPage The cached page.
2039 */
2040static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2041{
2042 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
2043
2044 /*
2045 * Filter out the relevant kinds.
2046 */
2047 switch (pPage->enmKind)
2048 {
2049 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2050 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2051 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2052 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2053 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2054 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2055 case PGMPOOLKIND_64BIT_PML4:
2056 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2057 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2058 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2059 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2060 case PGMPOOLKIND_32BIT_PD:
2061 case PGMPOOLKIND_PAE_PDPT:
2062 break;
2063
2064 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2065 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2066 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2067 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2068 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2069 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2070 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2071 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2072 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2073 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2074 case PGMPOOLKIND_ROOT_NESTED:
2075 /* Nothing to monitor here. */
2076 return VINF_SUCCESS;
2077
2078 case PGMPOOLKIND_32BIT_PD_PHYS:
2079 case PGMPOOLKIND_PAE_PDPT_PHYS:
2080 case PGMPOOLKIND_PAE_PD_PHYS:
2081 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2082 /* Nothing to monitor here. */
2083 return VINF_SUCCESS;
2084#ifdef PGMPOOL_WITH_MIXED_PT_CR3
2085 break;
2086#else
2087 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2088#endif
2089 default:
2090 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2091 }
2092
2093 /*
2094 * Install handler.
2095 */
2096 int rc;
2097 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2098 if (pPageHead)
2099 {
2100 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2101 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2102 Assert(pPageHead->iMonitoredPrev == NIL_PGMPOOL_IDX || pPool->aPages[pPageHead->iMonitoredPrev].GCPhys == pPage->GCPhys);
2103 Assert(pPageHead->iMonitoredNext == NIL_PGMPOOL_IDX || pPool->aPages[pPageHead->iMonitoredNext].GCPhys == pPage->GCPhys);
2104
2105#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2106 if (pPageHead->fDirty)
2107 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, true /* force removal */);
2108#endif
2109
2110 pPage->iMonitoredPrev = pPageHead->idx;
2111 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2112 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2113 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2114 pPageHead->iMonitoredNext = pPage->idx;
2115 rc = VINF_SUCCESS;
2116 }
2117 else
2118 {
2119 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2120 PVM pVM = pPool->CTX_SUFF(pVM);
2121 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2122 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2123 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
2124 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2125 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2126 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2127 pPool->pszAccessHandler);
2128 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2129 * the heap size should suffice. */
2130 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2131 Assert(!(VMMGetCpu(pVM)->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(VMMGetCpu(pVM), VMCPU_FF_PGM_SYNC_CR3));
2132 }
2133 pPage->fMonitored = true;
2134 return rc;
2135}
2136
2137
2138/**
2139 * Disables write monitoring of a guest page.
2140 *
2141 * @returns VBox status code.
2142 * @retval VINF_SUCCESS on success.
2143 * @param pPool The pool.
2144 * @param pPage The cached page.
2145 */
2146static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2147{
2148 /*
2149 * Filter out the relevant kinds.
2150 */
2151 switch (pPage->enmKind)
2152 {
2153 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2154 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2155 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2156 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2157 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2158 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2159 case PGMPOOLKIND_64BIT_PML4:
2160 case PGMPOOLKIND_32BIT_PD:
2161 case PGMPOOLKIND_PAE_PDPT:
2162 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2163 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2164 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2165 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2166 break;
2167
2168 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2169 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2170 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2171 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2172 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2173 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2174 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2175 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2176 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2177 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2178 case PGMPOOLKIND_ROOT_NESTED:
2179 case PGMPOOLKIND_PAE_PD_PHYS:
2180 case PGMPOOLKIND_PAE_PDPT_PHYS:
2181 case PGMPOOLKIND_32BIT_PD_PHYS:
2182 /* Nothing to monitor here. */
2183 return VINF_SUCCESS;
2184
2185#ifdef PGMPOOL_WITH_MIXED_PT_CR3
2186 break;
2187#endif
2188 default:
2189 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2190 }
2191
2192 /*
2193 * Remove the page from the monitored list or uninstall it if last.
2194 */
2195 const PVM pVM = pPool->CTX_SUFF(pVM);
2196 int rc;
2197 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2198 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2199 {
2200 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2201 {
2202 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2203 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2204 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2205 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2206 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2207 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2208 pPool->pszAccessHandler);
2209 AssertFatalRCSuccess(rc);
2210 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2211 }
2212 else
2213 {
2214 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2215 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2216 {
2217 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2218 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2219 }
2220 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2221 rc = VINF_SUCCESS;
2222 }
2223 }
2224 else
2225 {
2226 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2227 AssertFatalRC(rc);
2228#ifdef VBOX_STRICT
2229 PVMCPU pVCpu = VMMGetCpu(pVM);
2230#endif
2231 AssertMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2232 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2233 }
2234 pPage->fMonitored = false;
2235
2236 /*
2237 * Remove it from the list of modified pages (if in it).
2238 */
2239 pgmPoolMonitorModifiedRemove(pPool, pPage);
2240
2241 return rc;
2242}
2243
2244
2245/**
2246 * Inserts the page into the list of modified pages.
2247 *
2248 * @param pPool The pool.
2249 * @param pPage The page.
2250 */
2251void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2252{
2253 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2254 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2255 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2256 && pPool->iModifiedHead != pPage->idx,
2257 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2258 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2259 pPool->iModifiedHead, pPool->cModifiedPages));
2260
2261 pPage->iModifiedNext = pPool->iModifiedHead;
2262 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2263 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2264 pPool->iModifiedHead = pPage->idx;
2265 pPool->cModifiedPages++;
2266#ifdef VBOX_WITH_STATISTICS
2267 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2268 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2269#endif
2270}
2271
2272
2273/**
2274 * Removes the page from the list of modified pages and resets the
2275 * moficiation counter.
2276 *
2277 * @param pPool The pool.
2278 * @param pPage The page which is believed to be in the list of modified pages.
2279 */
2280static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2281{
2282 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2283 if (pPool->iModifiedHead == pPage->idx)
2284 {
2285 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2286 pPool->iModifiedHead = pPage->iModifiedNext;
2287 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2288 {
2289 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2290 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2291 }
2292 pPool->cModifiedPages--;
2293 }
2294 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2295 {
2296 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2297 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2298 {
2299 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2300 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2301 }
2302 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2303 pPool->cModifiedPages--;
2304 }
2305 else
2306 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2307 pPage->cModifications = 0;
2308}
2309
2310
2311/**
2312 * Zaps the list of modified pages, resetting their modification counters in the process.
2313 *
2314 * @param pVM The VM handle.
2315 */
2316static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2317{
2318 pgmLock(pVM);
2319 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2320 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2321
2322 unsigned cPages = 0; NOREF(cPages);
2323
2324#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2325 pgmPoolResetDirtyPages(pVM, true /* force removal. */);
2326#endif
2327
2328 uint16_t idx = pPool->iModifiedHead;
2329 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2330 while (idx != NIL_PGMPOOL_IDX)
2331 {
2332 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2333 idx = pPage->iModifiedNext;
2334 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2335 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2336 pPage->cModifications = 0;
2337 Assert(++cPages);
2338 }
2339 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2340 pPool->cModifiedPages = 0;
2341 pgmUnlock(pVM);
2342}
2343
2344
2345#ifdef IN_RING3
2346/**
2347 * Callback to clear all shadow pages and clear all modification counters.
2348 *
2349 * @returns VBox status code.
2350 * @param pVM The VM handle.
2351 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
2352 * @param pvUser Unused parameter.
2353 *
2354 * @remark Should only be used when monitoring is available, thus placed in
2355 * the PGMPOOL_WITH_MONITORING \#ifdef.
2356 */
2357DECLCALLBACK(int) pgmPoolClearAll(PVM pVM, PVMCPU pVCpu, void *pvUser)
2358{
2359 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2360 STAM_PROFILE_START(&pPool->StatClearAll, c);
2361 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2362 NOREF(pvUser); NOREF(pVCpu);
2363
2364 pgmLock(pVM);
2365
2366 /*
2367 * Iterate all the pages until we've encountered all that in use.
2368 * This is simple but not quite optimal solution.
2369 */
2370 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2371 unsigned cLeft = pPool->cUsedPages;
2372 unsigned iPage = pPool->cCurPages;
2373 while (--iPage >= PGMPOOL_IDX_FIRST)
2374 {
2375 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2376 if (pPage->GCPhys != NIL_RTGCPHYS)
2377 {
2378 switch (pPage->enmKind)
2379 {
2380 /*
2381 * We only care about shadow page tables.
2382 */
2383 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2384 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2385 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2386 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2387 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2388 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2389 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2390 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2391 {
2392#ifdef PGMPOOL_WITH_USER_TRACKING
2393 if (pPage->cPresent)
2394#endif
2395 {
2396 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2397 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2398 ASMMemZeroPage(pvShw);
2399 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2400#ifdef PGMPOOL_WITH_USER_TRACKING
2401 pPage->cPresent = 0;
2402 pPage->iFirstPresent = ~0;
2403#endif
2404 }
2405 }
2406 /* fall thru */
2407
2408 default:
2409 Assert(!pPage->cModifications || ++cModifiedPages);
2410 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2411 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2412 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2413 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2414 pPage->cModifications = 0;
2415 break;
2416
2417 }
2418 if (!--cLeft)
2419 break;
2420 }
2421 }
2422
2423 /* swipe the special pages too. */
2424 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2425 {
2426 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2427 if (pPage->GCPhys != NIL_RTGCPHYS)
2428 {
2429 Assert(!pPage->cModifications || ++cModifiedPages);
2430 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2431 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2432 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2433 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2434 pPage->cModifications = 0;
2435 }
2436 }
2437
2438#ifndef DEBUG_michael
2439 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2440#endif
2441 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2442 pPool->cModifiedPages = 0;
2443
2444#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2445 /*
2446 * Clear all the GCPhys links and rebuild the phys ext free list.
2447 */
2448 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2449 pRam;
2450 pRam = pRam->CTX_SUFF(pNext))
2451 {
2452 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2453 while (iPage-- > 0)
2454 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
2455 }
2456
2457 pPool->iPhysExtFreeHead = 0;
2458 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2459 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2460 for (unsigned i = 0; i < cMaxPhysExts; i++)
2461 {
2462 paPhysExts[i].iNext = i + 1;
2463 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2464 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2465 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2466 }
2467 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2468#endif
2469
2470#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2471 /* Clear all dirty pages. */
2472 pPool->idxFreeDirtyPage = 0;
2473 pPool->cDirtyPages = 0;
2474 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
2475 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
2476#endif
2477
2478 /* Clear the PGM_SYNC_CLEAR_PGM_POOL flag on all VCPUs to prevent redundant flushes. */
2479 for (unsigned idCpu = 0; idCpu < pVM->cCPUs; idCpu++)
2480 {
2481 PVMCPU pVCpu = &pVM->aCpus[idCpu];
2482
2483 pVCpu->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2484 }
2485
2486 pPool->cPresent = 0;
2487 pgmUnlock(pVM);
2488 PGM_INVL_ALL_VCPU_TLBS(pVM);
2489 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2490 return VINF_SUCCESS;
2491}
2492#endif /* IN_RING3 */
2493
2494
2495/**
2496 * Handle SyncCR3 pool tasks
2497 *
2498 * @returns VBox status code.
2499 * @retval VINF_SUCCESS if successfully added.
2500 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2501 * @param pVCpu The VMCPU handle.
2502 * @remark Should only be used when monitoring is available, thus placed in
2503 * the PGMPOOL_WITH_MONITORING #ifdef.
2504 */
2505int pgmPoolSyncCR3(PVMCPU pVCpu)
2506{
2507 PVM pVM = pVCpu->CTX_SUFF(pVM);
2508 LogFlow(("pgmPoolSyncCR3\n"));
2509
2510 /*
2511 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2512 * Occasionally we will have to clear all the shadow page tables because we wanted
2513 * to monitor a page which was mapped by too many shadowed page tables. This operation
2514 * sometimes refered to as a 'lightweight flush'.
2515 */
2516# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2517 if (ASMBitTestAndClear(&pVCpu->pgm.s.fSyncFlags, PGM_SYNC_CLEAR_PGM_POOL_BIT))
2518 {
2519 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmPoolClearAll, NULL);
2520 AssertRC(rc);
2521 }
2522# else /* !IN_RING3 */
2523 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2524 {
2525 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2526 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2527 return VINF_PGM_SYNC_CR3;
2528 }
2529# endif /* !IN_RING3 */
2530 else
2531 pgmPoolMonitorModifiedClearAll(pVM);
2532
2533 return VINF_SUCCESS;
2534}
2535
2536#endif /* PGMPOOL_WITH_MONITORING */
2537#ifdef PGMPOOL_WITH_USER_TRACKING
2538
2539/**
2540 * Frees up at least one user entry.
2541 *
2542 * @returns VBox status code.
2543 * @retval VINF_SUCCESS if successfully added.
2544 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2545 * @param pPool The pool.
2546 * @param iUser The user index.
2547 */
2548static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2549{
2550 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2551#ifdef PGMPOOL_WITH_CACHE
2552 /*
2553 * Just free cached pages in a braindead fashion.
2554 */
2555 /** @todo walk the age list backwards and free the first with usage. */
2556 int rc = VINF_SUCCESS;
2557 do
2558 {
2559 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2560 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2561 rc = rc2;
2562 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2563 return rc;
2564#else
2565 /*
2566 * Lazy approach.
2567 */
2568 /* @todo This path no longer works (CR3 root pages will be flushed)!! */
2569 AssertCompileFailed();
2570 Assert(!CPUMIsGuestInLongMode(pVM));
2571 pgmPoolFlushAllInt(pPool);
2572 return VERR_PGM_POOL_FLUSHED;
2573#endif
2574}
2575
2576
2577/**
2578 * Inserts a page into the cache.
2579 *
2580 * This will create user node for the page, insert it into the GCPhys
2581 * hash, and insert it into the age list.
2582 *
2583 * @returns VBox status code.
2584 * @retval VINF_SUCCESS if successfully added.
2585 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2586 * @param pPool The pool.
2587 * @param pPage The cached page.
2588 * @param GCPhys The GC physical address of the page we're gonna shadow.
2589 * @param iUser The user index.
2590 * @param iUserTable The user table index.
2591 */
2592DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2593{
2594 int rc = VINF_SUCCESS;
2595 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2596
2597 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2598
2599#ifdef VBOX_STRICT
2600 /*
2601 * Check that the entry doesn't already exists.
2602 */
2603 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2604 {
2605 uint16_t i = pPage->iUserHead;
2606 do
2607 {
2608 Assert(i < pPool->cMaxUsers);
2609 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2610 i = paUsers[i].iNext;
2611 } while (i != NIL_PGMPOOL_USER_INDEX);
2612 }
2613#endif
2614
2615 /*
2616 * Find free a user node.
2617 */
2618 uint16_t i = pPool->iUserFreeHead;
2619 if (i == NIL_PGMPOOL_USER_INDEX)
2620 {
2621 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2622 if (RT_FAILURE(rc))
2623 return rc;
2624 i = pPool->iUserFreeHead;
2625 }
2626
2627 /*
2628 * Unlink the user node from the free list,
2629 * initialize and insert it into the user list.
2630 */
2631 pPool->iUserFreeHead = paUsers[i].iNext;
2632 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2633 paUsers[i].iUser = iUser;
2634 paUsers[i].iUserTable = iUserTable;
2635 pPage->iUserHead = i;
2636
2637 /*
2638 * Insert into cache and enable monitoring of the guest page if enabled.
2639 *
2640 * Until we implement caching of all levels, including the CR3 one, we'll
2641 * have to make sure we don't try monitor & cache any recursive reuse of
2642 * a monitored CR3 page. Because all windows versions are doing this we'll
2643 * have to be able to do combined access monitoring, CR3 + PT and
2644 * PD + PT (guest PAE).
2645 *
2646 * Update:
2647 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2648 */
2649#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2650# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2651 const bool fCanBeMonitored = true;
2652# else
2653 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2654 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2655 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2656# endif
2657# ifdef PGMPOOL_WITH_CACHE
2658 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2659# endif
2660 if (fCanBeMonitored)
2661 {
2662# ifdef PGMPOOL_WITH_MONITORING
2663 rc = pgmPoolMonitorInsert(pPool, pPage);
2664 AssertRC(rc);
2665 }
2666# endif
2667#endif /* PGMPOOL_WITH_MONITORING */
2668 return rc;
2669}
2670
2671
2672# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2673/**
2674 * Adds a user reference to a page.
2675 *
2676 * This will move the page to the head of the
2677 *
2678 * @returns VBox status code.
2679 * @retval VINF_SUCCESS if successfully added.
2680 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2681 * @param pPool The pool.
2682 * @param pPage The cached page.
2683 * @param iUser The user index.
2684 * @param iUserTable The user table.
2685 */
2686static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2687{
2688 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2689
2690 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2691
2692# ifdef VBOX_STRICT
2693 /*
2694 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2695 */
2696 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2697 {
2698 uint16_t i = pPage->iUserHead;
2699 do
2700 {
2701 Assert(i < pPool->cMaxUsers);
2702 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2703 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2704 i = paUsers[i].iNext;
2705 } while (i != NIL_PGMPOOL_USER_INDEX);
2706 }
2707# endif
2708
2709 /*
2710 * Allocate a user node.
2711 */
2712 uint16_t i = pPool->iUserFreeHead;
2713 if (i == NIL_PGMPOOL_USER_INDEX)
2714 {
2715 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2716 if (RT_FAILURE(rc))
2717 return rc;
2718 i = pPool->iUserFreeHead;
2719 }
2720 pPool->iUserFreeHead = paUsers[i].iNext;
2721
2722 /*
2723 * Initialize the user node and insert it.
2724 */
2725 paUsers[i].iNext = pPage->iUserHead;
2726 paUsers[i].iUser = iUser;
2727 paUsers[i].iUserTable = iUserTable;
2728 pPage->iUserHead = i;
2729
2730# ifdef PGMPOOL_WITH_CACHE
2731 /*
2732 * Tell the cache to update its replacement stats for this page.
2733 */
2734 pgmPoolCacheUsed(pPool, pPage);
2735# endif
2736 return VINF_SUCCESS;
2737}
2738# endif /* PGMPOOL_WITH_CACHE */
2739
2740
2741/**
2742 * Frees a user record associated with a page.
2743 *
2744 * This does not clear the entry in the user table, it simply replaces the
2745 * user record to the chain of free records.
2746 *
2747 * @param pPool The pool.
2748 * @param HCPhys The HC physical address of the shadow page.
2749 * @param iUser The shadow page pool index of the user table.
2750 * @param iUserTable The index into the user table (shadowed).
2751 */
2752static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2753{
2754 /*
2755 * Unlink and free the specified user entry.
2756 */
2757 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2758
2759 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2760 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2761 uint16_t i = pPage->iUserHead;
2762 if ( i != NIL_PGMPOOL_USER_INDEX
2763 && paUsers[i].iUser == iUser
2764 && paUsers[i].iUserTable == iUserTable)
2765 {
2766 pPage->iUserHead = paUsers[i].iNext;
2767
2768 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2769 paUsers[i].iNext = pPool->iUserFreeHead;
2770 pPool->iUserFreeHead = i;
2771 return;
2772 }
2773
2774 /* General: Linear search. */
2775 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2776 while (i != NIL_PGMPOOL_USER_INDEX)
2777 {
2778 if ( paUsers[i].iUser == iUser
2779 && paUsers[i].iUserTable == iUserTable)
2780 {
2781 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2782 paUsers[iPrev].iNext = paUsers[i].iNext;
2783 else
2784 pPage->iUserHead = paUsers[i].iNext;
2785
2786 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2787 paUsers[i].iNext = pPool->iUserFreeHead;
2788 pPool->iUserFreeHead = i;
2789 return;
2790 }
2791 iPrev = i;
2792 i = paUsers[i].iNext;
2793 }
2794
2795 /* Fatal: didn't find it */
2796 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2797 iUser, iUserTable, pPage->GCPhys));
2798}
2799
2800
2801/**
2802 * Gets the entry size of a shadow table.
2803 *
2804 * @param enmKind The kind of page.
2805 *
2806 * @returns The size of the entry in bytes. That is, 4 or 8.
2807 * @returns If the kind is not for a table, an assertion is raised and 0 is
2808 * returned.
2809 */
2810DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2811{
2812 switch (enmKind)
2813 {
2814 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2815 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2816 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2817 case PGMPOOLKIND_32BIT_PD:
2818 case PGMPOOLKIND_32BIT_PD_PHYS:
2819 return 4;
2820
2821 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2822 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2823 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2824 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2825 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2826 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2827 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2828 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2829 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2830 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2831 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2832 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2833 case PGMPOOLKIND_64BIT_PML4:
2834 case PGMPOOLKIND_PAE_PDPT:
2835 case PGMPOOLKIND_ROOT_NESTED:
2836 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2837 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2838 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2839 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2840 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2841 case PGMPOOLKIND_PAE_PD_PHYS:
2842 case PGMPOOLKIND_PAE_PDPT_PHYS:
2843 return 8;
2844
2845 default:
2846 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2847 }
2848}
2849
2850
2851/**
2852 * Gets the entry size of a guest table.
2853 *
2854 * @param enmKind The kind of page.
2855 *
2856 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2857 * @returns If the kind is not for a table, an assertion is raised and 0 is
2858 * returned.
2859 */
2860DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2861{
2862 switch (enmKind)
2863 {
2864 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2865 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2866 case PGMPOOLKIND_32BIT_PD:
2867 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2868 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2869 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2870 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2871 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2872 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2873 return 4;
2874
2875 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2876 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2877 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2878 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2879 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2880 case PGMPOOLKIND_64BIT_PML4:
2881 case PGMPOOLKIND_PAE_PDPT:
2882 return 8;
2883
2884 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2885 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2886 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2887 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2888 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2889 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2890 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2891 case PGMPOOLKIND_ROOT_NESTED:
2892 case PGMPOOLKIND_PAE_PD_PHYS:
2893 case PGMPOOLKIND_PAE_PDPT_PHYS:
2894 case PGMPOOLKIND_32BIT_PD_PHYS:
2895 /** @todo can we return 0? (nobody is calling this...) */
2896 AssertFailed();
2897 return 0;
2898
2899 default:
2900 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2901 }
2902}
2903
2904#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2905
2906/**
2907 * Scans one shadow page table for mappings of a physical page.
2908 *
2909 * @param pVM The VM handle.
2910 * @param pPhysPage The guest page in question.
2911 * @param iShw The shadow page table.
2912 * @param cRefs The number of references made in that PT.
2913 */
2914static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2915{
2916 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
2917 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2918
2919 /*
2920 * Assert sanity.
2921 */
2922 Assert(cRefs == 1);
2923 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2924 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2925
2926 /*
2927 * Then, clear the actual mappings to the page in the shadow PT.
2928 */
2929 switch (pPage->enmKind)
2930 {
2931 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2932 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2933 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2934 {
2935 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2936 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2937 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2938 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2939 {
2940 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2941 pPT->a[i].u = 0;
2942 cRefs--;
2943 if (!cRefs)
2944 return;
2945 }
2946#ifdef LOG_ENABLED
2947 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2948 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2949 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2950 {
2951 Log(("i=%d cRefs=%d\n", i, cRefs--));
2952 }
2953#endif
2954 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2955 break;
2956 }
2957
2958 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2959 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2960 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2961 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2962 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2963 {
2964 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2965 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2966 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2967 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2968 {
2969 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2970 pPT->a[i].u = 0;
2971 cRefs--;
2972 if (!cRefs)
2973 return;
2974 }
2975#ifdef LOG_ENABLED
2976 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2977 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2978 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2979 {
2980 Log(("i=%d cRefs=%d\n", i, cRefs--));
2981 }
2982#endif
2983 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
2984 break;
2985 }
2986
2987 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2988 {
2989 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2990 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2991 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2992 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2993 {
2994 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2995 pPT->a[i].u = 0;
2996 cRefs--;
2997 if (!cRefs)
2998 return;
2999 }
3000#ifdef LOG_ENABLED
3001 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3002 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3003 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3004 {
3005 Log(("i=%d cRefs=%d\n", i, cRefs--));
3006 }
3007#endif
3008 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3009 break;
3010 }
3011
3012 default:
3013 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3014 }
3015}
3016
3017
3018/**
3019 * Scans one shadow page table for mappings of a physical page.
3020 *
3021 * @param pVM The VM handle.
3022 * @param pPhysPage The guest page in question.
3023 * @param iShw The shadow page table.
3024 * @param cRefs The number of references made in that PT.
3025 */
3026void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
3027{
3028 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3029 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
3030 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3031 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
3032 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3033 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3034}
3035
3036
3037/**
3038 * Flushes a list of shadow page tables mapping the same physical page.
3039 *
3040 * @param pVM The VM handle.
3041 * @param pPhysPage The guest page in question.
3042 * @param iPhysExt The physical cross reference extent list to flush.
3043 */
3044void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
3045{
3046 Assert(PGMIsLockOwner(pVM));
3047 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3048 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3049 LogFlow(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%R[pgmpage] iPhysExt\n", pPhysPage, iPhysExt));
3050
3051 const uint16_t iPhysExtStart = iPhysExt;
3052 PPGMPOOLPHYSEXT pPhysExt;
3053 do
3054 {
3055 Assert(iPhysExt < pPool->cMaxPhysExts);
3056 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3057 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3058 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3059 {
3060 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
3061 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3062 }
3063
3064 /* next */
3065 iPhysExt = pPhysExt->iNext;
3066 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3067
3068 /* insert the list into the free list and clear the ram range entry. */
3069 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3070 pPool->iPhysExtFreeHead = iPhysExtStart;
3071 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3072
3073 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3074}
3075
3076#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3077
3078/**
3079 * Flushes all shadow page table mappings of the given guest page.
3080 *
3081 * This is typically called when the host page backing the guest one has been
3082 * replaced or when the page protection was changed due to an access handler.
3083 *
3084 * @returns VBox status code.
3085 * @retval VINF_SUCCESS if all references has been successfully cleared.
3086 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3087 * pool cleaning. FF and sync flags are set.
3088 *
3089 * @param pVM The VM handle.
3090 * @param pPhysPage The guest page in question.
3091 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3092 * flushed, it is NOT touched if this isn't necessary.
3093 * The caller MUST initialized this to @a false.
3094 */
3095int pgmPoolTrackFlushGCPhys(PVM pVM, PPGMPAGE pPhysPage, bool *pfFlushTLBs)
3096{
3097 PVMCPU pVCpu = VMMGetCpu(pVM);
3098 pgmLock(pVM);
3099 int rc = VINF_SUCCESS;
3100#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3101 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3102 if (u16)
3103 {
3104 /*
3105 * The zero page is currently screwing up the tracking and we'll
3106 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3107 * is defined, zero pages won't normally be mapped. Some kind of solution
3108 * will be needed for this problem of course, but it will have to wait...
3109 */
3110 if (PGM_PAGE_IS_ZERO(pPhysPage))
3111 rc = VINF_PGM_GCPHYS_ALIASED;
3112 else
3113 {
3114# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3115 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3116 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3117 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3118# endif
3119
3120 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3121 pgmPoolTrackFlushGCPhysPT(pVM,
3122 pPhysPage,
3123 PGMPOOL_TD_GET_IDX(u16),
3124 PGMPOOL_TD_GET_CREFS(u16));
3125 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3126 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, PGMPOOL_TD_GET_IDX(u16));
3127 else
3128 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3129 *pfFlushTLBs = true;
3130
3131# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3132 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3133# endif
3134 }
3135 }
3136
3137#elif defined(PGMPOOL_WITH_CACHE)
3138 if (PGM_PAGE_IS_ZERO(pPhysPage))
3139 rc = VINF_PGM_GCPHYS_ALIASED;
3140 else
3141 {
3142# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3143 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow kill the pool otherwise. */
3144 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3145# endif
3146 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3147 if (rc == VINF_SUCCESS)
3148 *pfFlushTLBs = true;
3149 }
3150
3151# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3152 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3153# endif
3154
3155#else
3156 rc = VINF_PGM_GCPHYS_ALIASED;
3157#endif
3158
3159 if (rc == VINF_PGM_GCPHYS_ALIASED)
3160 {
3161 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3162 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3163 rc = VINF_PGM_SYNC_CR3;
3164 }
3165 pgmUnlock(pVM);
3166 return rc;
3167}
3168
3169
3170/**
3171 * Scans all shadow page tables for mappings of a physical page.
3172 *
3173 * This may be slow, but it's most likely more efficient than cleaning
3174 * out the entire page pool / cache.
3175 *
3176 * @returns VBox status code.
3177 * @retval VINF_SUCCESS if all references has been successfully cleared.
3178 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3179 * a page pool cleaning.
3180 *
3181 * @param pVM The VM handle.
3182 * @param pPhysPage The guest page in question.
3183 */
3184int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3185{
3186 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3187 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3188 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3189 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3190
3191#if 1
3192 /*
3193 * There is a limit to what makes sense.
3194 */
3195 if (pPool->cPresent > 1024)
3196 {
3197 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3198 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3199 return VINF_PGM_GCPHYS_ALIASED;
3200 }
3201#endif
3202
3203 /*
3204 * Iterate all the pages until we've encountered all that in use.
3205 * This is simple but not quite optimal solution.
3206 */
3207 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3208 const uint32_t u32 = u64;
3209 unsigned cLeft = pPool->cUsedPages;
3210 unsigned iPage = pPool->cCurPages;
3211 while (--iPage >= PGMPOOL_IDX_FIRST)
3212 {
3213 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3214 if (pPage->GCPhys != NIL_RTGCPHYS)
3215 {
3216 switch (pPage->enmKind)
3217 {
3218 /*
3219 * We only care about shadow page tables.
3220 */
3221 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3222 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3223 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3224 {
3225 unsigned cPresent = pPage->cPresent;
3226 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3227 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3228 if (pPT->a[i].n.u1Present)
3229 {
3230 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3231 {
3232 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3233 pPT->a[i].u = 0;
3234 }
3235 if (!--cPresent)
3236 break;
3237 }
3238 break;
3239 }
3240
3241 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3242 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3243 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3244 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3245 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3246 {
3247 unsigned cPresent = pPage->cPresent;
3248 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3249 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3250 if (pPT->a[i].n.u1Present)
3251 {
3252 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3253 {
3254 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3255 pPT->a[i].u = 0;
3256 }
3257 if (!--cPresent)
3258 break;
3259 }
3260 break;
3261 }
3262 }
3263 if (!--cLeft)
3264 break;
3265 }
3266 }
3267
3268 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3269 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3270 return VINF_SUCCESS;
3271}
3272
3273
3274/**
3275 * Clears the user entry in a user table.
3276 *
3277 * This is used to remove all references to a page when flushing it.
3278 */
3279static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3280{
3281 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3282 Assert(pUser->iUser < pPool->cCurPages);
3283 uint32_t iUserTable = pUser->iUserTable;
3284
3285 /*
3286 * Map the user page.
3287 */
3288 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3289 union
3290 {
3291 uint64_t *pau64;
3292 uint32_t *pau32;
3293 } u;
3294 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3295
3296 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3297
3298 /* Safety precaution in case we change the paging for other modes too in the future. */
3299 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3300
3301#ifdef VBOX_STRICT
3302 /*
3303 * Some sanity checks.
3304 */
3305 switch (pUserPage->enmKind)
3306 {
3307 case PGMPOOLKIND_32BIT_PD:
3308 case PGMPOOLKIND_32BIT_PD_PHYS:
3309 Assert(iUserTable < X86_PG_ENTRIES);
3310 break;
3311 case PGMPOOLKIND_PAE_PDPT:
3312 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3313 case PGMPOOLKIND_PAE_PDPT_PHYS:
3314 Assert(iUserTable < 4);
3315 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3316 break;
3317 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3318 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3319 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3320 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3321 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3322 case PGMPOOLKIND_PAE_PD_PHYS:
3323 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3324 break;
3325 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3326 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3327 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3328 break;
3329 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3330 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3331 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3332 break;
3333 case PGMPOOLKIND_64BIT_PML4:
3334 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3335 /* GCPhys >> PAGE_SHIFT is the index here */
3336 break;
3337 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3338 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3339 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3340 break;
3341
3342 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3343 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3344 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3345 break;
3346
3347 case PGMPOOLKIND_ROOT_NESTED:
3348 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3349 break;
3350
3351 default:
3352 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3353 break;
3354 }
3355#endif /* VBOX_STRICT */
3356
3357 /*
3358 * Clear the entry in the user page.
3359 */
3360 switch (pUserPage->enmKind)
3361 {
3362 /* 32-bit entries */
3363 case PGMPOOLKIND_32BIT_PD:
3364 case PGMPOOLKIND_32BIT_PD_PHYS:
3365 u.pau32[iUserTable] = 0;
3366 break;
3367
3368 /* 64-bit entries */
3369 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3370 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3371 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3372 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3373 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3374#if defined(IN_RC)
3375 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3376 * non-present PDPT will continue to cause page faults.
3377 */
3378 ASMReloadCR3();
3379#endif
3380 /* no break */
3381 case PGMPOOLKIND_PAE_PD_PHYS:
3382 case PGMPOOLKIND_PAE_PDPT_PHYS:
3383 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3384 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3385 case PGMPOOLKIND_64BIT_PML4:
3386 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3387 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3388 case PGMPOOLKIND_PAE_PDPT:
3389 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3390 case PGMPOOLKIND_ROOT_NESTED:
3391 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3392 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3393 u.pau64[iUserTable] = 0;
3394 break;
3395
3396 default:
3397 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3398 }
3399}
3400
3401
3402/**
3403 * Clears all users of a page.
3404 */
3405static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3406{
3407 /*
3408 * Free all the user records.
3409 */
3410 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3411
3412 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3413 uint16_t i = pPage->iUserHead;
3414 while (i != NIL_PGMPOOL_USER_INDEX)
3415 {
3416 /* Clear enter in user table. */
3417 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3418
3419 /* Free it. */
3420 const uint16_t iNext = paUsers[i].iNext;
3421 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3422 paUsers[i].iNext = pPool->iUserFreeHead;
3423 pPool->iUserFreeHead = i;
3424
3425 /* Next. */
3426 i = iNext;
3427 }
3428 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3429}
3430
3431#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3432
3433/**
3434 * Allocates a new physical cross reference extent.
3435 *
3436 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3437 * @param pVM The VM handle.
3438 * @param piPhysExt Where to store the phys ext index.
3439 */
3440PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3441{
3442 Assert(PGMIsLockOwner(pVM));
3443 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3444 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3445 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3446 {
3447 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3448 return NULL;
3449 }
3450 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3451 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3452 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3453 *piPhysExt = iPhysExt;
3454 return pPhysExt;
3455}
3456
3457
3458/**
3459 * Frees a physical cross reference extent.
3460 *
3461 * @param pVM The VM handle.
3462 * @param iPhysExt The extent to free.
3463 */
3464void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3465{
3466 Assert(PGMIsLockOwner(pVM));
3467 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3468 Assert(iPhysExt < pPool->cMaxPhysExts);
3469 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3470 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3471 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3472 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3473 pPool->iPhysExtFreeHead = iPhysExt;
3474}
3475
3476
3477/**
3478 * Frees a physical cross reference extent.
3479 *
3480 * @param pVM The VM handle.
3481 * @param iPhysExt The extent to free.
3482 */
3483void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3484{
3485 Assert(PGMIsLockOwner(pVM));
3486 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3487
3488 const uint16_t iPhysExtStart = iPhysExt;
3489 PPGMPOOLPHYSEXT pPhysExt;
3490 do
3491 {
3492 Assert(iPhysExt < pPool->cMaxPhysExts);
3493 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3494 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3495 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3496
3497 /* next */
3498 iPhysExt = pPhysExt->iNext;
3499 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3500
3501 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3502 pPool->iPhysExtFreeHead = iPhysExtStart;
3503}
3504
3505
3506/**
3507 * Insert a reference into a list of physical cross reference extents.
3508 *
3509 * @returns The new tracking data for PGMPAGE.
3510 *
3511 * @param pVM The VM handle.
3512 * @param iPhysExt The physical extent index of the list head.
3513 * @param iShwPT The shadow page table index.
3514 *
3515 */
3516static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3517{
3518 Assert(PGMIsLockOwner(pVM));
3519 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3520 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3521
3522 /* special common case. */
3523 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3524 {
3525 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3526 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3527 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d}\n", iPhysExt, iShwPT));
3528 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3529 }
3530
3531 /* general treatment. */
3532 const uint16_t iPhysExtStart = iPhysExt;
3533 unsigned cMax = 15;
3534 for (;;)
3535 {
3536 Assert(iPhysExt < pPool->cMaxPhysExts);
3537 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3538 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3539 {
3540 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3541 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3542 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3543 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3544 }
3545 if (!--cMax)
3546 {
3547 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3548 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3549 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3550 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3551 }
3552 }
3553
3554 /* add another extent to the list. */
3555 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3556 if (!pNew)
3557 {
3558 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3559 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3560 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3561 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3562 }
3563 pNew->iNext = iPhysExtStart;
3564 pNew->aidx[0] = iShwPT;
3565 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3566 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3567}
3568
3569
3570/**
3571 * Add a reference to guest physical page where extents are in use.
3572 *
3573 * @returns The new tracking data for PGMPAGE.
3574 *
3575 * @param pVM The VM handle.
3576 * @param u16 The ram range flags (top 16-bits).
3577 * @param iShwPT The shadow page table index.
3578 */
3579uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3580{
3581 pgmLock(pVM);
3582 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3583 {
3584 /*
3585 * Convert to extent list.
3586 */
3587 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3588 uint16_t iPhysExt;
3589 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3590 if (pPhysExt)
3591 {
3592 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3593 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3594 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3595 pPhysExt->aidx[1] = iShwPT;
3596 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3597 }
3598 else
3599 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3600 }
3601 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3602 {
3603 /*
3604 * Insert into the extent list.
3605 */
3606 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3607 }
3608 else
3609 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3610 pgmUnlock(pVM);
3611 return u16;
3612}
3613
3614
3615/**
3616 * Clear references to guest physical memory.
3617 *
3618 * @param pPool The pool.
3619 * @param pPage The page.
3620 * @param pPhysPage Pointer to the aPages entry in the ram range.
3621 */
3622void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3623{
3624 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3625 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3626
3627 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3628 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3629 {
3630 PVM pVM = pPool->CTX_SUFF(pVM);
3631 pgmLock(pVM);
3632
3633 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3634 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3635 do
3636 {
3637 Assert(iPhysExt < pPool->cMaxPhysExts);
3638
3639 /*
3640 * Look for the shadow page and check if it's all freed.
3641 */
3642 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3643 {
3644 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3645 {
3646 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3647
3648 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3649 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3650 {
3651 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3652 pgmUnlock(pVM);
3653 return;
3654 }
3655
3656 /* we can free the node. */
3657 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3658 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3659 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3660 {
3661 /* lonely node */
3662 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3663 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3664 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3665 }
3666 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3667 {
3668 /* head */
3669 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3670 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3671 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3672 }
3673 else
3674 {
3675 /* in list */
3676 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3677 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3678 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3679 }
3680 iPhysExt = iPhysExtNext;
3681 pgmUnlock(pVM);
3682 return;
3683 }
3684 }
3685
3686 /* next */
3687 iPhysExtPrev = iPhysExt;
3688 iPhysExt = paPhysExts[iPhysExt].iNext;
3689 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3690
3691 pgmUnlock(pVM);
3692 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3693 }
3694 else /* nothing to do */
3695 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3696}
3697
3698
3699/**
3700 * Clear references to guest physical memory.
3701 *
3702 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3703 * is assumed to be correct, so the linear search can be skipped and we can assert
3704 * at an earlier point.
3705 *
3706 * @param pPool The pool.
3707 * @param pPage The page.
3708 * @param HCPhys The host physical address corresponding to the guest page.
3709 * @param GCPhys The guest physical address corresponding to HCPhys.
3710 */
3711static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3712{
3713 /*
3714 * Walk range list.
3715 */
3716 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3717 while (pRam)
3718 {
3719 RTGCPHYS off = GCPhys - pRam->GCPhys;
3720 if (off < pRam->cb)
3721 {
3722 /* does it match? */
3723 const unsigned iPage = off >> PAGE_SHIFT;
3724 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3725#ifdef LOG_ENABLED
3726RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3727Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3728#endif
3729 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3730 {
3731 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3732 return;
3733 }
3734 break;
3735 }
3736 pRam = pRam->CTX_SUFF(pNext);
3737 }
3738 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3739}
3740
3741
3742/**
3743 * Clear references to guest physical memory.
3744 *
3745 * @param pPool The pool.
3746 * @param pPage The page.
3747 * @param HCPhys The host physical address corresponding to the guest page.
3748 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3749 */
3750void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3751{
3752 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
3753
3754 /*
3755 * Walk range list.
3756 */
3757 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3758 while (pRam)
3759 {
3760 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3761 if (off < pRam->cb)
3762 {
3763 /* does it match? */
3764 const unsigned iPage = off >> PAGE_SHIFT;
3765 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3766 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3767 {
3768 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3769 return;
3770 }
3771 break;
3772 }
3773 pRam = pRam->CTX_SUFF(pNext);
3774 }
3775
3776 /*
3777 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3778 */
3779 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3780 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3781 while (pRam)
3782 {
3783 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3784 while (iPage-- > 0)
3785 {
3786 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3787 {
3788 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3789 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3790 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3791 return;
3792 }
3793 }
3794 pRam = pRam->CTX_SUFF(pNext);
3795 }
3796
3797 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3798}
3799
3800
3801/**
3802 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3803 *
3804 * @param pPool The pool.
3805 * @param pPage The page.
3806 * @param pShwPT The shadow page table (mapping of the page).
3807 * @param pGstPT The guest page table.
3808 */
3809DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3810{
3811 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3812 if (pShwPT->a[i].n.u1Present)
3813 {
3814 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3815 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3816 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3817 if (!--pPage->cPresent)
3818 break;
3819 }
3820}
3821
3822
3823/**
3824 * Clear references to guest physical memory in a PAE / 32-bit page table.
3825 *
3826 * @param pPool The pool.
3827 * @param pPage The page.
3828 * @param pShwPT The shadow page table (mapping of the page).
3829 * @param pGstPT The guest page table (just a half one).
3830 */
3831DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3832{
3833 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3834 if (pShwPT->a[i].n.u1Present)
3835 {
3836 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
3837 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3838 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3839 }
3840}
3841
3842
3843/**
3844 * Clear references to guest physical memory in a PAE / PAE page table.
3845 *
3846 * @param pPool The pool.
3847 * @param pPage The page.
3848 * @param pShwPT The shadow page table (mapping of the page).
3849 * @param pGstPT The guest page table.
3850 */
3851DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3852{
3853 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3854 if (pShwPT->a[i].n.u1Present)
3855 {
3856 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3857 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3858 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3859 }
3860}
3861
3862
3863/**
3864 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3865 *
3866 * @param pPool The pool.
3867 * @param pPage The page.
3868 * @param pShwPT The shadow page table (mapping of the page).
3869 */
3870DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3871{
3872 RTGCPHYS GCPhys = pPage->GCPhys;
3873 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3874 if (pShwPT->a[i].n.u1Present)
3875 {
3876 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3877 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3878 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3879 }
3880}
3881
3882
3883/**
3884 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3885 *
3886 * @param pPool The pool.
3887 * @param pPage The page.
3888 * @param pShwPT The shadow page table (mapping of the page).
3889 */
3890DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3891{
3892 RTGCPHYS GCPhys = pPage->GCPhys;
3893 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3894 if (pShwPT->a[i].n.u1Present)
3895 {
3896 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3897 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3898 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3899 }
3900}
3901
3902#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3903
3904
3905/**
3906 * Clear references to shadowed pages in a 32 bits page directory.
3907 *
3908 * @param pPool The pool.
3909 * @param pPage The page.
3910 * @param pShwPD The shadow page directory (mapping of the page).
3911 */
3912DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
3913{
3914 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3915 {
3916 if ( pShwPD->a[i].n.u1Present
3917 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3918 )
3919 {
3920 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
3921 if (pSubPage)
3922 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3923 else
3924 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
3925 }
3926 }
3927}
3928
3929/**
3930 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3931 *
3932 * @param pPool The pool.
3933 * @param pPage The page.
3934 * @param pShwPD The shadow page directory (mapping of the page).
3935 */
3936DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3937{
3938 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3939 {
3940 if ( pShwPD->a[i].n.u1Present
3941 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3942 )
3943 {
3944 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3945 if (pSubPage)
3946 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3947 else
3948 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3949 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3950 }
3951 }
3952}
3953
3954/**
3955 * Clear references to shadowed pages in a PAE page directory pointer table.
3956 *
3957 * @param pPool The pool.
3958 * @param pPage The page.
3959 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3960 */
3961DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3962{
3963 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
3964 {
3965 if ( pShwPDPT->a[i].n.u1Present
3966 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
3967 )
3968 {
3969 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3970 if (pSubPage)
3971 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3972 else
3973 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3974 }
3975 }
3976}
3977
3978
3979/**
3980 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3981 *
3982 * @param pPool The pool.
3983 * @param pPage The page.
3984 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3985 */
3986DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3987{
3988 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3989 {
3990 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
3991 if (pShwPDPT->a[i].n.u1Present)
3992 {
3993 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3994 if (pSubPage)
3995 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3996 else
3997 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3998 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3999 }
4000 }
4001}
4002
4003
4004/**
4005 * Clear references to shadowed pages in a 64-bit level 4 page table.
4006 *
4007 * @param pPool The pool.
4008 * @param pPage The page.
4009 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4010 */
4011DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4012{
4013 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4014 {
4015 if (pShwPML4->a[i].n.u1Present)
4016 {
4017 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4018 if (pSubPage)
4019 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4020 else
4021 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4022 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4023 }
4024 }
4025}
4026
4027
4028/**
4029 * Clear references to shadowed pages in an EPT page table.
4030 *
4031 * @param pPool The pool.
4032 * @param pPage The page.
4033 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4034 */
4035DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4036{
4037 RTGCPHYS GCPhys = pPage->GCPhys;
4038 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4039 if (pShwPT->a[i].n.u1Present)
4040 {
4041 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4042 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4043 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
4044 }
4045}
4046
4047
4048/**
4049 * Clear references to shadowed pages in an EPT page directory.
4050 *
4051 * @param pPool The pool.
4052 * @param pPage The page.
4053 * @param pShwPD The shadow page directory (mapping of the page).
4054 */
4055DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4056{
4057 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4058 {
4059 if (pShwPD->a[i].n.u1Present)
4060 {
4061 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4062 if (pSubPage)
4063 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4064 else
4065 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4066 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4067 }
4068 }
4069}
4070
4071
4072/**
4073 * Clear references to shadowed pages in an EPT page directory pointer table.
4074 *
4075 * @param pPool The pool.
4076 * @param pPage The page.
4077 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4078 */
4079DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4080{
4081 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4082 {
4083 if (pShwPDPT->a[i].n.u1Present)
4084 {
4085 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4086 if (pSubPage)
4087 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4088 else
4089 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4090 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4091 }
4092 }
4093}
4094
4095
4096/**
4097 * Clears all references made by this page.
4098 *
4099 * This includes other shadow pages and GC physical addresses.
4100 *
4101 * @param pPool The pool.
4102 * @param pPage The page.
4103 */
4104static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4105{
4106 /*
4107 * Map the shadow page and take action according to the page kind.
4108 */
4109 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
4110 switch (pPage->enmKind)
4111 {
4112#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4113 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4114 {
4115 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4116 void *pvGst;
4117 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4118 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4119 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4120 break;
4121 }
4122
4123 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4124 {
4125 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4126 void *pvGst;
4127 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4128 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
4129 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4130 break;
4131 }
4132
4133 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4134 {
4135 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4136 void *pvGst;
4137 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4138 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
4139 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4140 break;
4141 }
4142
4143 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4144 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4145 {
4146 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4147 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4148 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4149 break;
4150 }
4151
4152 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4153 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4154 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4155 {
4156 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4157 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4158 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4159 break;
4160 }
4161
4162#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4163 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4164 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4165 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4166 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4167 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4168 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4169 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4170 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4171 break;
4172#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4173
4174 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4175 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4176 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4177 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4178 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4179 case PGMPOOLKIND_PAE_PD_PHYS:
4180 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4181 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4182 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4183 break;
4184
4185 case PGMPOOLKIND_32BIT_PD_PHYS:
4186 case PGMPOOLKIND_32BIT_PD:
4187 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4188 break;
4189
4190 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4191 case PGMPOOLKIND_PAE_PDPT:
4192 case PGMPOOLKIND_PAE_PDPT_PHYS:
4193 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4194 break;
4195
4196 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4197 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4198 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4199 break;
4200
4201 case PGMPOOLKIND_64BIT_PML4:
4202 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4203 break;
4204
4205 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4206 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4207 break;
4208
4209 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4210 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4211 break;
4212
4213 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4214 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4215 break;
4216
4217 default:
4218 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4219 }
4220
4221 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4222 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4223 ASMMemZeroPage(pvShw);
4224 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4225 pPage->fZeroed = true;
4226 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4227}
4228#endif /* PGMPOOL_WITH_USER_TRACKING */
4229
4230/**
4231 * Flushes a pool page.
4232 *
4233 * This moves the page to the free list after removing all user references to it.
4234 *
4235 * @returns VBox status code.
4236 * @retval VINF_SUCCESS on success.
4237 * @param pPool The pool.
4238 * @param HCPhys The HC physical address of the shadow page.
4239 */
4240int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4241{
4242 PVM pVM = pPool->CTX_SUFF(pVM);
4243
4244 int rc = VINF_SUCCESS;
4245 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4246 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4247 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4248
4249 /*
4250 * Quietly reject any attempts at flushing any of the special root pages.
4251 */
4252 if (pPage->idx < PGMPOOL_IDX_FIRST)
4253 {
4254 AssertFailed(); /* can no longer happen */
4255 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4256 return VINF_SUCCESS;
4257 }
4258
4259 pgmLock(pVM);
4260
4261 /*
4262 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4263 */
4264 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4265 {
4266 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4267 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4268 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4269 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4270 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4271 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4272 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4273 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4274 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4275 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4276 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4277 pgmUnlock(pVM);
4278 return VINF_SUCCESS;
4279 }
4280
4281#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4282 /* Start a subset so we won't run out of mapping space. */
4283 PVMCPU pVCpu = VMMGetCpu(pVM);
4284 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4285#endif
4286
4287 /*
4288 * Mark the page as being in need of a ASMMemZeroPage().
4289 */
4290 pPage->fZeroed = false;
4291
4292#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4293 if (pPage->fDirty)
4294 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, true /* force removal */);
4295#endif
4296
4297#ifdef PGMPOOL_WITH_USER_TRACKING
4298 /*
4299 * Clear the page.
4300 */
4301 pgmPoolTrackClearPageUsers(pPool, pPage);
4302 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4303 pgmPoolTrackDeref(pPool, pPage);
4304 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4305#endif
4306
4307#ifdef PGMPOOL_WITH_CACHE
4308 /*
4309 * Flush it from the cache.
4310 */
4311 pgmPoolCacheFlushPage(pPool, pPage);
4312#endif /* PGMPOOL_WITH_CACHE */
4313
4314#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4315 /* Heavy stuff done. */
4316 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4317#endif
4318
4319#ifdef PGMPOOL_WITH_MONITORING
4320 /*
4321 * Deregistering the monitoring.
4322 */
4323 if (pPage->fMonitored)
4324 rc = pgmPoolMonitorFlush(pPool, pPage);
4325#endif
4326
4327 /*
4328 * Free the page.
4329 */
4330 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4331 pPage->iNext = pPool->iFreeHead;
4332 pPool->iFreeHead = pPage->idx;
4333 pPage->enmKind = PGMPOOLKIND_FREE;
4334 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4335 pPage->GCPhys = NIL_RTGCPHYS;
4336 pPage->fReusedFlushPending = false;
4337
4338 pPool->cUsedPages--;
4339 pgmUnlock(pVM);
4340 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4341 return rc;
4342}
4343
4344
4345/**
4346 * Frees a usage of a pool page.
4347 *
4348 * The caller is responsible to updating the user table so that it no longer
4349 * references the shadow page.
4350 *
4351 * @param pPool The pool.
4352 * @param HCPhys The HC physical address of the shadow page.
4353 * @param iUser The shadow page pool index of the user table.
4354 * @param iUserTable The index into the user table (shadowed).
4355 */
4356void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4357{
4358 PVM pVM = pPool->CTX_SUFF(pVM);
4359
4360 STAM_PROFILE_START(&pPool->StatFree, a);
4361 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4362 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4363 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4364 pgmLock(pVM);
4365#ifdef PGMPOOL_WITH_USER_TRACKING
4366 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4367#endif
4368#ifdef PGMPOOL_WITH_CACHE
4369 if (!pPage->fCached)
4370#endif
4371 pgmPoolFlushPage(pPool, pPage);
4372 pgmUnlock(pVM);
4373 STAM_PROFILE_STOP(&pPool->StatFree, a);
4374}
4375
4376
4377/**
4378 * Makes one or more free page free.
4379 *
4380 * @returns VBox status code.
4381 * @retval VINF_SUCCESS on success.
4382 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4383 *
4384 * @param pPool The pool.
4385 * @param enmKind Page table kind
4386 * @param iUser The user of the page.
4387 */
4388static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4389{
4390 PVM pVM = pPool->CTX_SUFF(pVM);
4391
4392 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4393
4394 /*
4395 * If the pool isn't full grown yet, expand it.
4396 */
4397 if ( pPool->cCurPages < pPool->cMaxPages
4398#if defined(IN_RC)
4399 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4400 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4401 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4402#endif
4403 )
4404 {
4405 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4406#ifdef IN_RING3
4407 int rc = PGMR3PoolGrow(pVM);
4408#else
4409 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4410#endif
4411 if (RT_FAILURE(rc))
4412 return rc;
4413 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4414 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4415 return VINF_SUCCESS;
4416 }
4417
4418#ifdef PGMPOOL_WITH_CACHE
4419 /*
4420 * Free one cached page.
4421 */
4422 return pgmPoolCacheFreeOne(pPool, iUser);
4423#else
4424 /*
4425 * Flush the pool.
4426 *
4427 * If we have tracking enabled, it should be possible to come up with
4428 * a cheap replacement strategy...
4429 */
4430 /* @todo This path no longer works (CR3 root pages will be flushed)!! */
4431 AssertCompileFailed();
4432 Assert(!CPUMIsGuestInLongMode(pVM));
4433 pgmPoolFlushAllInt(pPool);
4434 return VERR_PGM_POOL_FLUSHED;
4435#endif
4436}
4437
4438/**
4439 * Allocates a page from the pool.
4440 *
4441 * This page may actually be a cached page and not in need of any processing
4442 * on the callers part.
4443 *
4444 * @returns VBox status code.
4445 * @retval VINF_SUCCESS if a NEW page was allocated.
4446 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4447 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4448 * @param pVM The VM handle.
4449 * @param GCPhys The GC physical address of the page we're gonna shadow.
4450 * For 4MB and 2MB PD entries, it's the first address the
4451 * shadow PT is covering.
4452 * @param enmKind The kind of mapping.
4453 * @param enmAccess Access type for the mapping (only relevant for big pages)
4454 * @param iUser The shadow page pool index of the user table.
4455 * @param iUserTable The index into the user table (shadowed).
4456 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4457 * @param fLockPage Lock the page
4458 */
4459int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4460{
4461 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4462 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4463 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4464 *ppPage = NULL;
4465 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4466 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4467 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4468
4469 pgmLock(pVM);
4470
4471#ifdef PGMPOOL_WITH_CACHE
4472 if (pPool->fCacheEnabled)
4473 {
4474 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4475 if (RT_SUCCESS(rc2))
4476 {
4477 if (fLockPage)
4478 pgmPoolLockPage(pPool, *ppPage);
4479 pgmUnlock(pVM);
4480 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4481 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4482 return rc2;
4483 }
4484 }
4485#endif
4486
4487 /*
4488 * Allocate a new one.
4489 */
4490 int rc = VINF_SUCCESS;
4491 uint16_t iNew = pPool->iFreeHead;
4492 if (iNew == NIL_PGMPOOL_IDX)
4493 {
4494 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4495 if (RT_FAILURE(rc))
4496 {
4497 pgmUnlock(pVM);
4498 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4499 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4500 return rc;
4501 }
4502 iNew = pPool->iFreeHead;
4503 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4504 }
4505
4506 /* unlink the free head */
4507 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4508 pPool->iFreeHead = pPage->iNext;
4509 pPage->iNext = NIL_PGMPOOL_IDX;
4510
4511 /*
4512 * Initialize it.
4513 */
4514 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4515 pPage->enmKind = enmKind;
4516 pPage->enmAccess = enmAccess;
4517 pPage->GCPhys = GCPhys;
4518 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4519 pPage->fMonitored = false;
4520 pPage->fCached = false;
4521#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4522 pPage->fDirty = false;
4523#endif
4524 pPage->fReusedFlushPending = false;
4525#ifdef PGMPOOL_WITH_MONITORING
4526 pPage->cModifications = 0;
4527 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4528 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4529#else
4530 pPage->fCR3Mix = false;
4531#endif
4532#ifdef PGMPOOL_WITH_USER_TRACKING
4533 pPage->cPresent = 0;
4534 pPage->iFirstPresent = ~0;
4535 pPage->pvLastAccessHandlerFault = 0;
4536 pPage->cLastAccessHandlerCount = 0;
4537 pPage->pvLastAccessHandlerRip = 0;
4538
4539 /*
4540 * Insert into the tracking and cache. If this fails, free the page.
4541 */
4542 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4543 if (RT_FAILURE(rc3))
4544 {
4545 pPool->cUsedPages--;
4546 pPage->enmKind = PGMPOOLKIND_FREE;
4547 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4548 pPage->GCPhys = NIL_RTGCPHYS;
4549 pPage->iNext = pPool->iFreeHead;
4550 pPool->iFreeHead = pPage->idx;
4551 pgmUnlock(pVM);
4552 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4553 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4554 return rc3;
4555 }
4556#endif /* PGMPOOL_WITH_USER_TRACKING */
4557
4558 /*
4559 * Commit the allocation, clear the page and return.
4560 */
4561#ifdef VBOX_WITH_STATISTICS
4562 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4563 pPool->cUsedPagesHigh = pPool->cUsedPages;
4564#endif
4565
4566 if (!pPage->fZeroed)
4567 {
4568 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4569 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4570 ASMMemZeroPage(pv);
4571 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4572 }
4573
4574 *ppPage = pPage;
4575 if (fLockPage)
4576 pgmPoolLockPage(pPool, pPage);
4577 pgmUnlock(pVM);
4578 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4579 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4580 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4581 return rc;
4582}
4583
4584
4585/**
4586 * Frees a usage of a pool page.
4587 *
4588 * @param pVM The VM handle.
4589 * @param HCPhys The HC physical address of the shadow page.
4590 * @param iUser The shadow page pool index of the user table.
4591 * @param iUserTable The index into the user table (shadowed).
4592 */
4593void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4594{
4595 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4596 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4597 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4598}
4599
4600/**
4601 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4602 *
4603 * @returns Pointer to the shadow page structure.
4604 * @param pPool The pool.
4605 * @param HCPhys The HC physical address of the shadow page.
4606 */
4607PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4608{
4609 PVM pVM = pPool->CTX_SUFF(pVM);
4610
4611 Assert(PGMIsLockOwner(pVM));
4612
4613 /*
4614 * Look up the page.
4615 */
4616 pgmLock(pVM);
4617 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4618 pgmUnlock(pVM);
4619
4620 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4621 return pPage;
4622}
4623
4624
4625#ifdef IN_RING3
4626/**
4627 * Flushes the entire cache.
4628 *
4629 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4630 * and execute this CR3 flush.
4631 *
4632 * @param pPool The pool.
4633 */
4634void pgmR3PoolReset(PVM pVM)
4635{
4636 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4637
4638 Assert(PGMIsLockOwner(pVM));
4639 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
4640 LogFlow(("pgmPoolFlushAllInt:\n"));
4641
4642 /*
4643 * If there are no pages in the pool, there is nothing to do.
4644 */
4645 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4646 {
4647 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4648 return;
4649 }
4650
4651 /*
4652 * Exit the shadow mode since we're going to clear everything,
4653 * including the root page.
4654 */
4655 for (unsigned i=0;i<pVM->cCPUs;i++)
4656 {
4657 PVMCPU pVCpu = &pVM->aCpus[i];
4658 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
4659 }
4660
4661 /*
4662 * Nuke the free list and reinsert all pages into it.
4663 */
4664 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4665 {
4666 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4667
4668 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
4669#ifdef PGMPOOL_WITH_MONITORING
4670 if (pPage->fMonitored)
4671 pgmPoolMonitorFlush(pPool, pPage);
4672 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4673 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4674 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4675 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4676 pPage->cModifications = 0;
4677#endif
4678 pPage->GCPhys = NIL_RTGCPHYS;
4679 pPage->enmKind = PGMPOOLKIND_FREE;
4680 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4681 Assert(pPage->idx == i);
4682 pPage->iNext = i + 1;
4683 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4684 pPage->fSeenNonGlobal = false;
4685 pPage->fMonitored = false;
4686#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4687 pPage->fDirty = false;
4688#endif
4689 pPage->fCached = false;
4690 pPage->fReusedFlushPending = false;
4691#ifdef PGMPOOL_WITH_USER_TRACKING
4692 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4693#else
4694 pPage->fCR3Mix = false;
4695#endif
4696#ifdef PGMPOOL_WITH_CACHE
4697 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4698 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4699#endif
4700 pPage->cLocked = 0;
4701 }
4702 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4703 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4704 pPool->cUsedPages = 0;
4705
4706#ifdef PGMPOOL_WITH_USER_TRACKING
4707 /*
4708 * Zap and reinitialize the user records.
4709 */
4710 pPool->cPresent = 0;
4711 pPool->iUserFreeHead = 0;
4712 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4713 const unsigned cMaxUsers = pPool->cMaxUsers;
4714 for (unsigned i = 0; i < cMaxUsers; i++)
4715 {
4716 paUsers[i].iNext = i + 1;
4717 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4718 paUsers[i].iUserTable = 0xfffffffe;
4719 }
4720 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4721#endif
4722
4723#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4724 /*
4725 * Clear all the GCPhys links and rebuild the phys ext free list.
4726 */
4727 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
4728 pRam;
4729 pRam = pRam->CTX_SUFF(pNext))
4730 {
4731 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4732 while (iPage-- > 0)
4733 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
4734 }
4735
4736 pPool->iPhysExtFreeHead = 0;
4737 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4738 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4739 for (unsigned i = 0; i < cMaxPhysExts; i++)
4740 {
4741 paPhysExts[i].iNext = i + 1;
4742 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4743 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4744 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4745 }
4746 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4747#endif
4748
4749#ifdef PGMPOOL_WITH_MONITORING
4750 /*
4751 * Just zap the modified list.
4752 */
4753 pPool->cModifiedPages = 0;
4754 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4755#endif
4756
4757#ifdef PGMPOOL_WITH_CACHE
4758 /*
4759 * Clear the GCPhys hash and the age list.
4760 */
4761 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4762 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4763 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4764 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4765#endif
4766
4767#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4768 /* Clear all dirty pages. */
4769 pPool->idxFreeDirtyPage = 0;
4770 pPool->cDirtyPages = 0;
4771 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
4772 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
4773#endif
4774
4775 /*
4776 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4777 */
4778 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4779 {
4780 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4781 pPage->iNext = NIL_PGMPOOL_IDX;
4782#ifdef PGMPOOL_WITH_MONITORING
4783 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4784 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4785 pPage->cModifications = 0;
4786 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4787 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4788 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4789 if (pPage->fMonitored)
4790 {
4791 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4792 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4793 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4794 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4795 pPool->pszAccessHandler);
4796 AssertFatalRCSuccess(rc);
4797# ifdef PGMPOOL_WITH_CACHE
4798 pgmPoolHashInsert(pPool, pPage);
4799# endif
4800 }
4801#endif
4802#ifdef PGMPOOL_WITH_USER_TRACKING
4803 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4804#endif
4805#ifdef PGMPOOL_WITH_CACHE
4806 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4807 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4808#endif
4809 }
4810
4811 for (unsigned i=0;i<pVM->cCPUs;i++)
4812 {
4813 PVMCPU pVCpu = &pVM->aCpus[i];
4814 /*
4815 * Re-enter the shadowing mode and assert Sync CR3 FF.
4816 */
4817 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
4818 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
4819 }
4820
4821 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4822}
4823#endif /* IN_RING3 */
4824
4825#ifdef LOG_ENABLED
4826static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
4827{
4828 switch(enmKind)
4829 {
4830 case PGMPOOLKIND_INVALID:
4831 return "PGMPOOLKIND_INVALID";
4832 case PGMPOOLKIND_FREE:
4833 return "PGMPOOLKIND_FREE";
4834 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4835 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4836 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4837 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4838 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4839 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4840 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4841 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4842 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4843 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4844 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4845 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
4846 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4847 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
4848 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4849 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
4850 case PGMPOOLKIND_32BIT_PD:
4851 return "PGMPOOLKIND_32BIT_PD";
4852 case PGMPOOLKIND_32BIT_PD_PHYS:
4853 return "PGMPOOLKIND_32BIT_PD_PHYS";
4854 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4855 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
4856 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4857 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
4858 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4859 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
4860 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4861 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
4862 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4863 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
4864 case PGMPOOLKIND_PAE_PD_PHYS:
4865 return "PGMPOOLKIND_PAE_PD_PHYS";
4866 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4867 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
4868 case PGMPOOLKIND_PAE_PDPT:
4869 return "PGMPOOLKIND_PAE_PDPT";
4870 case PGMPOOLKIND_PAE_PDPT_PHYS:
4871 return "PGMPOOLKIND_PAE_PDPT_PHYS";
4872 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4873 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
4874 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4875 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
4876 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4877 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
4878 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4879 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
4880 case PGMPOOLKIND_64BIT_PML4:
4881 return "PGMPOOLKIND_64BIT_PML4";
4882 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4883 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
4884 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4885 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
4886 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4887 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
4888 case PGMPOOLKIND_ROOT_NESTED:
4889 return "PGMPOOLKIND_ROOT_NESTED";
4890 }
4891 return "Unknown kind!";
4892}
4893#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette