VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 27744

Last change on this file since 27744 was 27704, checked in by vboxsync, 15 years ago

More informative assertions

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 197.3 KB
Line 
1/* $Id: PGMAllPool.cpp 27704 2010-03-25 13:18:31Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "../PGMInternal.h"
35#include <VBox/vm.h>
36#include "../PGMInline.h"
37#include <VBox/disopcode.h>
38#include <VBox/hwacc_vmx.h>
39
40#include <VBox/log.h>
41#include <VBox/err.h>
42#include <iprt/asm.h>
43#include <iprt/string.h>
44
45
46/*******************************************************************************
47* Internal Functions *
48*******************************************************************************/
49RT_C_DECLS_BEGIN
50static void pgmPoolFlushAllInt(PPGMPOOL pPool);
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
55static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
56#ifndef IN_RING3
57DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
58#endif
59#ifdef LOG_ENABLED
60static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
61#endif
62#if defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)
63static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT);
64#endif
65
66int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
67PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
68void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
69void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
70
71RT_C_DECLS_END
72
73
74/**
75 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
76 *
77 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
78 * @param enmKind The page kind.
79 */
80DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
81{
82 switch (enmKind)
83 {
84 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
85 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
86 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
87 return true;
88 default:
89 return false;
90 }
91}
92
93/** @def PGMPOOL_PAGE_2_LOCKED_PTR
94 * Maps a pool page pool into the current context and lock it (RC only).
95 *
96 * @returns VBox status code.
97 * @param pVM The VM handle.
98 * @param pPage The pool page.
99 *
100 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
101 * small page window employeed by that function. Be careful.
102 * @remark There is no need to assert on the result.
103 */
104#if defined(IN_RC)
105DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
106{
107 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
108
109 /* Make sure the dynamic mapping will not be reused. */
110 if (pv)
111 PGMDynLockHCPage(pVM, (uint8_t *)pv);
112
113 return pv;
114}
115#else
116# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
117#endif
118
119/** @def PGMPOOL_UNLOCK_PTR
120 * Unlock a previously locked dynamic caching (RC only).
121 *
122 * @returns VBox status code.
123 * @param pVM The VM handle.
124 * @param pPage The pool page.
125 *
126 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
127 * small page window employeed by that function. Be careful.
128 * @remark There is no need to assert on the result.
129 */
130#if defined(IN_RC)
131DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
132{
133 if (pvPage)
134 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
135}
136#else
137# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
138#endif
139
140
141/**
142 * Flushes a chain of pages sharing the same access monitor.
143 *
144 * @returns VBox status code suitable for scheduling.
145 * @param pPool The pool.
146 * @param pPage A page in the chain.
147 */
148int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
149{
150 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
151
152 /*
153 * Find the list head.
154 */
155 uint16_t idx = pPage->idx;
156 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
157 {
158 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
159 {
160 idx = pPage->iMonitoredPrev;
161 Assert(idx != pPage->idx);
162 pPage = &pPool->aPages[idx];
163 }
164 }
165
166 /*
167 * Iterate the list flushing each shadow page.
168 */
169 int rc = VINF_SUCCESS;
170 for (;;)
171 {
172 idx = pPage->iMonitoredNext;
173 Assert(idx != pPage->idx);
174 if (pPage->idx >= PGMPOOL_IDX_FIRST)
175 {
176 int rc2 = pgmPoolFlushPage(pPool, pPage);
177 AssertRC(rc2);
178 }
179 /* next */
180 if (idx == NIL_PGMPOOL_IDX)
181 break;
182 pPage = &pPool->aPages[idx];
183 }
184 return rc;
185}
186
187
188/**
189 * Wrapper for getting the current context pointer to the entry being modified.
190 *
191 * @returns VBox status code suitable for scheduling.
192 * @param pVM VM Handle.
193 * @param pvDst Destination address
194 * @param pvSrc Source guest virtual address.
195 * @param GCPhysSrc The source guest physical address.
196 * @param cb Size of data to read
197 */
198DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
199{
200#if defined(IN_RING3)
201 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
202 return VINF_SUCCESS;
203#else
204 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
205 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
206#endif
207}
208
209/**
210 * Process shadow entries before they are changed by the guest.
211 *
212 * For PT entries we will clear them. For PD entries, we'll simply check
213 * for mapping conflicts and set the SyncCR3 FF if found.
214 *
215 * @param pVCpu VMCPU handle
216 * @param pPool The pool.
217 * @param pPage The head page.
218 * @param GCPhysFault The guest physical fault address.
219 * @param uAddress In R0 and GC this is the guest context fault address (flat).
220 * In R3 this is the host context 'fault' address.
221 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
222 */
223void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
224{
225 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
226 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
227 PVM pVM = pPool->CTX_SUFF(pVM);
228
229 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
230
231 for (;;)
232 {
233 union
234 {
235 void *pv;
236 PX86PT pPT;
237 PX86PTPAE pPTPae;
238 PX86PD pPD;
239 PX86PDPAE pPDPae;
240 PX86PDPT pPDPT;
241 PX86PML4 pPML4;
242 } uShw;
243
244 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
245
246 uShw.pv = NULL;
247 switch (pPage->enmKind)
248 {
249 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
250 {
251 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
252 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
253 const unsigned iShw = off / sizeof(X86PTE);
254 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
255 if (uShw.pPT->a[iShw].n.u1Present)
256 {
257 X86PTE GstPte;
258
259 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
260 AssertRC(rc);
261 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
262 pgmPoolTracDerefGCPhysHint(pPool, pPage,
263 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
264 GstPte.u & X86_PTE_PG_MASK);
265 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
266 }
267 break;
268 }
269
270 /* page/2 sized */
271 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
272 {
273 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
274 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
275 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
276 {
277 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
278 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
279 if (uShw.pPTPae->a[iShw].n.u1Present)
280 {
281 X86PTE GstPte;
282 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
283 AssertRC(rc);
284
285 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
286 pgmPoolTracDerefGCPhysHint(pPool, pPage,
287 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
288 GstPte.u & X86_PTE_PG_MASK);
289 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
290 }
291 }
292 break;
293 }
294
295 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
296 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
297 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
298 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
299 {
300 unsigned iGst = off / sizeof(X86PDE);
301 unsigned iShwPdpt = iGst / 256;
302 unsigned iShw = (iGst % 256) * 2;
303 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
304
305 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
306 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
307 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
308 {
309 for (unsigned i = 0; i < 2; i++)
310 {
311# ifndef IN_RING0
312 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
313 {
314 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
315 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
316 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
317 break;
318 }
319 else
320# endif /* !IN_RING0 */
321 if (uShw.pPDPae->a[iShw+i].n.u1Present)
322 {
323 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
324 pgmPoolFree(pVM,
325 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
326 pPage->idx,
327 iShw + i);
328 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
329 }
330
331 /* paranoia / a bit assumptive. */
332 if ( (off & 3)
333 && (off & 3) + cbWrite > 4)
334 {
335 const unsigned iShw2 = iShw + 2 + i;
336 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
337 {
338# ifndef IN_RING0
339 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
340 {
341 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
342 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
343 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
344 break;
345 }
346 else
347# endif /* !IN_RING0 */
348 if (uShw.pPDPae->a[iShw2].n.u1Present)
349 {
350 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
351 pgmPoolFree(pVM,
352 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
353 pPage->idx,
354 iShw2);
355 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
356 }
357 }
358 }
359 }
360 }
361 break;
362 }
363
364 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
365 {
366 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
367 const unsigned iShw = off / sizeof(X86PTEPAE);
368 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
369 if (uShw.pPTPae->a[iShw].n.u1Present)
370 {
371 X86PTEPAE GstPte;
372 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
373 AssertRC(rc);
374
375 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
376 pgmPoolTracDerefGCPhysHint(pPool, pPage,
377 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
378 GstPte.u & X86_PTE_PAE_PG_MASK);
379 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
380 }
381
382 /* paranoia / a bit assumptive. */
383 if ( (off & 7)
384 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
385 {
386 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
387 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
388
389 if (uShw.pPTPae->a[iShw2].n.u1Present)
390 {
391 X86PTEPAE GstPte;
392# ifdef IN_RING3
393 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
394# else
395 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
396# endif
397 AssertRC(rc);
398 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
399 pgmPoolTracDerefGCPhysHint(pPool, pPage,
400 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
401 GstPte.u & X86_PTE_PAE_PG_MASK);
402 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
403 }
404 }
405 break;
406 }
407
408 case PGMPOOLKIND_32BIT_PD:
409 {
410 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
411 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
412
413 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
414 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
415# ifndef IN_RING0
416 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
417 {
418 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
419 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
420 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
421 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
422 break;
423 }
424# endif /* !IN_RING0 */
425# ifndef IN_RING0
426 else
427# endif /* !IN_RING0 */
428 {
429 if (uShw.pPD->a[iShw].n.u1Present)
430 {
431 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
432 pgmPoolFree(pVM,
433 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
434 pPage->idx,
435 iShw);
436 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
437 }
438 }
439 /* paranoia / a bit assumptive. */
440 if ( (off & 3)
441 && (off & 3) + cbWrite > sizeof(X86PTE))
442 {
443 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
444 if ( iShw2 != iShw
445 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
446 {
447# ifndef IN_RING0
448 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
449 {
450 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
451 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
452 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
453 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
454 break;
455 }
456# endif /* !IN_RING0 */
457# ifndef IN_RING0
458 else
459# endif /* !IN_RING0 */
460 {
461 if (uShw.pPD->a[iShw2].n.u1Present)
462 {
463 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
464 pgmPoolFree(pVM,
465 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
466 pPage->idx,
467 iShw2);
468 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
469 }
470 }
471 }
472 }
473#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
474 if ( uShw.pPD->a[iShw].n.u1Present
475 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
476 {
477 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
478# ifdef IN_RC /* TLB load - we're pushing things a bit... */
479 ASMProbeReadByte(pvAddress);
480# endif
481 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
482 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
483 }
484#endif
485 break;
486 }
487
488 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
489 {
490 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
491 const unsigned iShw = off / sizeof(X86PDEPAE);
492 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
493#ifndef IN_RING0
494 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
495 {
496 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
497 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
498 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
499 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
500 break;
501 }
502#endif /* !IN_RING0 */
503 /*
504 * Causes trouble when the guest uses a PDE to refer to the whole page table level
505 * structure. (Invalidate here; faults later on when it tries to change the page
506 * table entries -> recheck; probably only applies to the RC case.)
507 */
508# ifndef IN_RING0
509 else
510# endif /* !IN_RING0 */
511 {
512 if (uShw.pPDPae->a[iShw].n.u1Present)
513 {
514 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
515 pgmPoolFree(pVM,
516 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
517 pPage->idx,
518 iShw);
519 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
520 }
521 }
522 /* paranoia / a bit assumptive. */
523 if ( (off & 7)
524 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
525 {
526 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
527 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
528
529#ifndef IN_RING0
530 if ( iShw2 != iShw
531 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
532 {
533 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
534 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
535 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
536 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
537 break;
538 }
539#endif /* !IN_RING0 */
540# ifndef IN_RING0
541 else
542# endif /* !IN_RING0 */
543 if (uShw.pPDPae->a[iShw2].n.u1Present)
544 {
545 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
546 pgmPoolFree(pVM,
547 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
548 pPage->idx,
549 iShw2);
550 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
551 }
552 }
553 break;
554 }
555
556 case PGMPOOLKIND_PAE_PDPT:
557 {
558 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
559 /*
560 * Hopefully this doesn't happen very often:
561 * - touching unused parts of the page
562 * - messing with the bits of pd pointers without changing the physical address
563 */
564 /* PDPT roots are not page aligned; 32 byte only! */
565 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
566
567 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
568 const unsigned iShw = offPdpt / sizeof(X86PDPE);
569 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
570 {
571# ifndef IN_RING0
572 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
573 {
574 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
575 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
576 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
577 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
578 break;
579 }
580# endif /* !IN_RING0 */
581# ifndef IN_RING0
582 else
583# endif /* !IN_RING0 */
584 if (uShw.pPDPT->a[iShw].n.u1Present)
585 {
586 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
587 pgmPoolFree(pVM,
588 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
589 pPage->idx,
590 iShw);
591 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
592 }
593
594 /* paranoia / a bit assumptive. */
595 if ( (offPdpt & 7)
596 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
597 {
598 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
599 if ( iShw2 != iShw
600 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
601 {
602# ifndef IN_RING0
603 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
604 {
605 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
606 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
607 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
608 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
609 break;
610 }
611# endif /* !IN_RING0 */
612# ifndef IN_RING0
613 else
614# endif /* !IN_RING0 */
615 if (uShw.pPDPT->a[iShw2].n.u1Present)
616 {
617 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
618 pgmPoolFree(pVM,
619 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
620 pPage->idx,
621 iShw2);
622 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
623 }
624 }
625 }
626 }
627 break;
628 }
629
630#ifndef IN_RC
631 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
632 {
633 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
634 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
635 const unsigned iShw = off / sizeof(X86PDEPAE);
636 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
637 if (uShw.pPDPae->a[iShw].n.u1Present)
638 {
639 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
640 pgmPoolFree(pVM,
641 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
642 pPage->idx,
643 iShw);
644 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
645 }
646 /* paranoia / a bit assumptive. */
647 if ( (off & 7)
648 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
649 {
650 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
651 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
652
653 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
654 if (uShw.pPDPae->a[iShw2].n.u1Present)
655 {
656 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
657 pgmPoolFree(pVM,
658 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
659 pPage->idx,
660 iShw2);
661 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
662 }
663 }
664 break;
665 }
666
667 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
668 {
669 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
670 /*
671 * Hopefully this doesn't happen very often:
672 * - messing with the bits of pd pointers without changing the physical address
673 */
674 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
675 const unsigned iShw = off / sizeof(X86PDPE);
676 if (uShw.pPDPT->a[iShw].n.u1Present)
677 {
678 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
679 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
680 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
681 }
682 /* paranoia / a bit assumptive. */
683 if ( (off & 7)
684 && (off & 7) + cbWrite > sizeof(X86PDPE))
685 {
686 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
687 if (uShw.pPDPT->a[iShw2].n.u1Present)
688 {
689 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
690 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
691 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
692 }
693 }
694 break;
695 }
696
697 case PGMPOOLKIND_64BIT_PML4:
698 {
699 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
700 /*
701 * Hopefully this doesn't happen very often:
702 * - messing with the bits of pd pointers without changing the physical address
703 */
704 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
705 const unsigned iShw = off / sizeof(X86PDPE);
706 if (uShw.pPML4->a[iShw].n.u1Present)
707 {
708 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
709 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
710 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
711 }
712 /* paranoia / a bit assumptive. */
713 if ( (off & 7)
714 && (off & 7) + cbWrite > sizeof(X86PDPE))
715 {
716 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
717 if (uShw.pPML4->a[iShw2].n.u1Present)
718 {
719 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
720 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
721 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
722 }
723 }
724 break;
725 }
726#endif /* IN_RING0 */
727
728 default:
729 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
730 }
731 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
732
733 /* next */
734 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
735 return;
736 pPage = &pPool->aPages[pPage->iMonitoredNext];
737 }
738}
739
740# ifndef IN_RING3
741/**
742 * Checks if a access could be a fork operation in progress.
743 *
744 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
745 *
746 * @returns true if it's likly that we're forking, otherwise false.
747 * @param pPool The pool.
748 * @param pDis The disassembled instruction.
749 * @param offFault The access offset.
750 */
751DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
752{
753 /*
754 * i386 linux is using btr to clear X86_PTE_RW.
755 * The functions involved are (2.6.16 source inspection):
756 * clear_bit
757 * ptep_set_wrprotect
758 * copy_one_pte
759 * copy_pte_range
760 * copy_pmd_range
761 * copy_pud_range
762 * copy_page_range
763 * dup_mmap
764 * dup_mm
765 * copy_mm
766 * copy_process
767 * do_fork
768 */
769 if ( pDis->pCurInstr->opcode == OP_BTR
770 && !(offFault & 4)
771 /** @todo Validate that the bit index is X86_PTE_RW. */
772 )
773 {
774 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
775 return true;
776 }
777 return false;
778}
779
780
781/**
782 * Determine whether the page is likely to have been reused.
783 *
784 * @returns true if we consider the page as being reused for a different purpose.
785 * @returns false if we consider it to still be a paging page.
786 * @param pVM VM Handle.
787 * @param pVCpu VMCPU Handle.
788 * @param pRegFrame Trap register frame.
789 * @param pDis The disassembly info for the faulting instruction.
790 * @param pvFault The fault address.
791 *
792 * @remark The REP prefix check is left to the caller because of STOSD/W.
793 */
794DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
795{
796#ifndef IN_RC
797 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
798 if ( HWACCMHasPendingIrq(pVM)
799 && (pRegFrame->rsp - pvFault) < 32)
800 {
801 /* Fault caused by stack writes while trying to inject an interrupt event. */
802 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
803 return true;
804 }
805#else
806 NOREF(pVM); NOREF(pvFault);
807#endif
808
809 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
810
811 /* Non-supervisor mode write means it's used for something else. */
812 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
813 return true;
814
815 switch (pDis->pCurInstr->opcode)
816 {
817 /* call implies the actual push of the return address faulted */
818 case OP_CALL:
819 Log4(("pgmPoolMonitorIsReused: CALL\n"));
820 return true;
821 case OP_PUSH:
822 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
823 return true;
824 case OP_PUSHF:
825 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
826 return true;
827 case OP_PUSHA:
828 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
829 return true;
830 case OP_FXSAVE:
831 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
832 return true;
833 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
834 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
835 return true;
836 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
837 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
838 return true;
839 case OP_MOVSWD:
840 case OP_STOSWD:
841 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
842 && pRegFrame->rcx >= 0x40
843 )
844 {
845 Assert(pDis->mode == CPUMODE_64BIT);
846
847 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
848 return true;
849 }
850 return false;
851 }
852 if ( ( (pDis->param1.flags & USE_REG_GEN32)
853 || (pDis->param1.flags & USE_REG_GEN64))
854 && (pDis->param1.base.reg_gen == USE_REG_ESP))
855 {
856 Log4(("pgmPoolMonitorIsReused: ESP\n"));
857 return true;
858 }
859
860 return false;
861}
862
863/**
864 * Flushes the page being accessed.
865 *
866 * @returns VBox status code suitable for scheduling.
867 * @param pVM The VM handle.
868 * @param pVCpu The VMCPU handle.
869 * @param pPool The pool.
870 * @param pPage The pool page (head).
871 * @param pDis The disassembly of the write instruction.
872 * @param pRegFrame The trap register frame.
873 * @param GCPhysFault The fault address as guest physical address.
874 * @param pvFault The fault address.
875 */
876static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
877 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
878{
879 /*
880 * First, do the flushing.
881 */
882 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
883
884 /*
885 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection). Must do this in raw mode (!); XP boot will fail otherwise
886 */
887 uint32_t cbWritten;
888 int rc2 = EMInterpretInstructionCPUEx(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten, EMCODETYPE_ALL);
889 if (RT_SUCCESS(rc2))
890 pRegFrame->rip += pDis->opsize;
891 else if (rc2 == VERR_EM_INTERPRETER)
892 {
893#ifdef IN_RC
894 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
895 {
896 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
897 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
898 rc = VINF_SUCCESS;
899 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
900 }
901 else
902#endif
903 {
904 rc = VINF_EM_RAW_EMULATE_INSTR;
905 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
906 }
907 }
908 else
909 rc = rc2;
910
911 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
912 return rc;
913}
914
915/**
916 * Handles the STOSD write accesses.
917 *
918 * @returns VBox status code suitable for scheduling.
919 * @param pVM The VM handle.
920 * @param pPool The pool.
921 * @param pPage The pool page (head).
922 * @param pDis The disassembly of the write instruction.
923 * @param pRegFrame The trap register frame.
924 * @param GCPhysFault The fault address as guest physical address.
925 * @param pvFault The fault address.
926 */
927DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
928 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
929{
930 unsigned uIncrement = pDis->param1.size;
931
932 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
933 Assert(pRegFrame->rcx <= 0x20);
934
935#ifdef VBOX_STRICT
936 if (pDis->opmode == CPUMODE_32BIT)
937 Assert(uIncrement == 4);
938 else
939 Assert(uIncrement == 8);
940#endif
941
942 Log3(("pgmPoolAccessHandlerSTOSD\n"));
943
944 /*
945 * Increment the modification counter and insert it into the list
946 * of modified pages the first time.
947 */
948 if (!pPage->cModifications++)
949 pgmPoolMonitorModifiedInsert(pPool, pPage);
950
951 /*
952 * Execute REP STOSD.
953 *
954 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
955 * write situation, meaning that it's safe to write here.
956 */
957 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
958 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
959 while (pRegFrame->rcx)
960 {
961#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
962 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
963 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
964 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
965#else
966 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
967#endif
968#ifdef IN_RC
969 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
970#else
971 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
972#endif
973 pu32 += uIncrement;
974 GCPhysFault += uIncrement;
975 pRegFrame->rdi += uIncrement;
976 pRegFrame->rcx--;
977 }
978 pRegFrame->rip += pDis->opsize;
979
980 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
981 return VINF_SUCCESS;
982}
983
984
985/**
986 * Handles the simple write accesses.
987 *
988 * @returns VBox status code suitable for scheduling.
989 * @param pVM The VM handle.
990 * @param pVCpu The VMCPU handle.
991 * @param pPool The pool.
992 * @param pPage The pool page (head).
993 * @param pDis The disassembly of the write instruction.
994 * @param pRegFrame The trap register frame.
995 * @param GCPhysFault The fault address as guest physical address.
996 * @param pvFault The fault address.
997 * @param pfReused Reused state (out)
998 */
999DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1000 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
1001{
1002 Log3(("pgmPoolAccessHandlerSimple\n"));
1003 /*
1004 * Increment the modification counter and insert it into the list
1005 * of modified pages the first time.
1006 */
1007 if (!pPage->cModifications++)
1008 pgmPoolMonitorModifiedInsert(pPool, pPage);
1009
1010 /*
1011 * Clear all the pages. ASSUMES that pvFault is readable.
1012 */
1013#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1014 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1015 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1016 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1017#else
1018 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1019#endif
1020
1021 /*
1022 * Interpret the instruction.
1023 */
1024 uint32_t cb;
1025 int rc = EMInterpretInstructionCPUEx(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb, EMCODETYPE_ALL);
1026 if (RT_SUCCESS(rc))
1027 pRegFrame->rip += pDis->opsize;
1028 else if (rc == VERR_EM_INTERPRETER)
1029 {
1030 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1031 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1032 rc = VINF_EM_RAW_EMULATE_INSTR;
1033 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1034 }
1035
1036#if 0 /* experimental code */
1037 if (rc == VINF_SUCCESS)
1038 {
1039 switch (pPage->enmKind)
1040 {
1041 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1042 {
1043 X86PTEPAE GstPte;
1044 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1045 AssertRC(rc);
1046
1047 /* Check the new value written by the guest. If present and with a bogus physical address, then
1048 * it's fairly safe to assume the guest is reusing the PT.
1049 */
1050 if (GstPte.n.u1Present)
1051 {
1052 RTHCPHYS HCPhys = -1;
1053 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1054 if (rc != VINF_SUCCESS)
1055 {
1056 *pfReused = true;
1057 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1058 }
1059 }
1060 break;
1061 }
1062 }
1063 }
1064#endif
1065
1066 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1067 return rc;
1068}
1069
1070/**
1071 * \#PF Handler callback for PT write accesses.
1072 *
1073 * @returns VBox status code (appropriate for GC return).
1074 * @param pVM VM Handle.
1075 * @param uErrorCode CPU Error code.
1076 * @param pRegFrame Trap register frame.
1077 * NULL on DMA and other non CPU access.
1078 * @param pvFault The fault address (cr2).
1079 * @param GCPhysFault The GC physical address corresponding to pvFault.
1080 * @param pvUser User argument.
1081 */
1082DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1083{
1084 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1085 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1086 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1087 PVMCPU pVCpu = VMMGetCpu(pVM);
1088 unsigned cMaxModifications;
1089 bool fForcedFlush = false;
1090
1091 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1092
1093 pgmLock(pVM);
1094 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1095 {
1096 /* Pool page changed while we were waiting for the lock; ignore. */
1097 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1098 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1099 pgmUnlock(pVM);
1100 return VINF_SUCCESS;
1101 }
1102#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1103 if (pPage->fDirty)
1104 {
1105 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1106 pgmUnlock(pVM);
1107 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1108 }
1109#endif
1110
1111#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1112 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1113 {
1114 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1115 void *pvGst;
1116 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1117 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1118 }
1119#endif
1120
1121 /*
1122 * Disassemble the faulting instruction.
1123 */
1124 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1125 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1126 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1127 {
1128 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1129 pgmUnlock(pVM);
1130 return rc;
1131 }
1132
1133 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1134
1135 /*
1136 * We should ALWAYS have the list head as user parameter. This
1137 * is because we use that page to record the changes.
1138 */
1139 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1140
1141#ifdef IN_RING0
1142 /* Maximum nr of modifications depends on the page type. */
1143 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1144 cMaxModifications = 4;
1145 else
1146 cMaxModifications = 24;
1147#else
1148 cMaxModifications = 48;
1149#endif
1150
1151 /*
1152 * Incremental page table updates should weight more than random ones.
1153 * (Only applies when started from offset 0)
1154 */
1155 pVCpu->pgm.s.cPoolAccessHandler++;
1156 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1157 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1158 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1159 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1160 {
1161 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1162 pPage->cModifications = pPage->cModifications * 2;
1163 pPage->pvLastAccessHandlerFault = pvFault;
1164 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1165 if (pPage->cModifications >= cMaxModifications)
1166 {
1167 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1168 fForcedFlush = true;
1169 }
1170 }
1171
1172 if (pPage->cModifications >= cMaxModifications)
1173 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1174
1175 /*
1176 * Check if it's worth dealing with.
1177 */
1178 bool fReused = false;
1179 bool fNotReusedNotForking = false;
1180 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1181 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1182 )
1183 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1184 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1185 {
1186 /*
1187 * Simple instructions, no REP prefix.
1188 */
1189 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1190 {
1191 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1192 if (fReused)
1193 goto flushPage;
1194
1195 /* A mov instruction to change the first page table entry will be remembered so we can detect
1196 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1197 */
1198 if ( rc == VINF_SUCCESS
1199 && pDis->pCurInstr->opcode == OP_MOV
1200 && (pvFault & PAGE_OFFSET_MASK) == 0)
1201 {
1202 pPage->pvLastAccessHandlerFault = pvFault;
1203 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1204 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1205 /* Make sure we don't kick out a page too quickly. */
1206 if (pPage->cModifications > 8)
1207 pPage->cModifications = 2;
1208 }
1209 else
1210 if (pPage->pvLastAccessHandlerFault == pvFault)
1211 {
1212 /* ignore the 2nd write to this page table entry. */
1213 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1214 }
1215 else
1216 {
1217 pPage->pvLastAccessHandlerFault = 0;
1218 pPage->pvLastAccessHandlerRip = 0;
1219 }
1220
1221 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1222 pgmUnlock(pVM);
1223 return rc;
1224 }
1225
1226 /*
1227 * Windows is frequently doing small memset() operations (netio test 4k+).
1228 * We have to deal with these or we'll kill the cache and performance.
1229 */
1230 if ( pDis->pCurInstr->opcode == OP_STOSWD
1231 && !pRegFrame->eflags.Bits.u1DF
1232 && pDis->opmode == pDis->mode
1233 && pDis->addrmode == pDis->mode)
1234 {
1235 bool fValidStosd = false;
1236
1237 if ( pDis->mode == CPUMODE_32BIT
1238 && pDis->prefix == PREFIX_REP
1239 && pRegFrame->ecx <= 0x20
1240 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1241 && !((uintptr_t)pvFault & 3)
1242 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1243 )
1244 {
1245 fValidStosd = true;
1246 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1247 }
1248 else
1249 if ( pDis->mode == CPUMODE_64BIT
1250 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1251 && pRegFrame->rcx <= 0x20
1252 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1253 && !((uintptr_t)pvFault & 7)
1254 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1255 )
1256 {
1257 fValidStosd = true;
1258 }
1259
1260 if (fValidStosd)
1261 {
1262 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1263 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1264 pgmUnlock(pVM);
1265 return rc;
1266 }
1267 }
1268
1269 /* REP prefix, don't bother. */
1270 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1271 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1272 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1273 fNotReusedNotForking = true;
1274 }
1275
1276#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1277 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1278 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1279 */
1280 if ( pPage->cModifications >= cMaxModifications
1281 && !fForcedFlush
1282 && pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1283 && ( fNotReusedNotForking
1284 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1285 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1286 )
1287 )
1288 {
1289 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1290 Assert(pPage->fDirty == false);
1291
1292 /* Flush any monitored duplicates as we will disable write protection. */
1293 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1294 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1295 {
1296 PPGMPOOLPAGE pPageHead = pPage;
1297
1298 /* Find the monitor head. */
1299 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1300 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1301
1302 while (pPageHead)
1303 {
1304 unsigned idxNext = pPageHead->iMonitoredNext;
1305
1306 if (pPageHead != pPage)
1307 {
1308 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1309 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1310 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1311 AssertRC(rc2);
1312 }
1313
1314 if (idxNext == NIL_PGMPOOL_IDX)
1315 break;
1316
1317 pPageHead = &pPool->aPages[idxNext];
1318 }
1319 }
1320
1321 /* The flushing above might fail for locked pages, so double check. */
1322 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1323 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1324 {
1325 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1326
1327 /* Temporarily allow write access to the page table again. */
1328 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
1329 if (rc == VINF_SUCCESS)
1330 {
1331 rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
1332 AssertMsg(rc == VINF_SUCCESS
1333 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1334 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1335 || rc == VERR_PAGE_NOT_PRESENT,
1336 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1337
1338 pPage->pvDirtyFault = pvFault;
1339
1340 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1341 pgmUnlock(pVM);
1342 return rc;
1343 }
1344 }
1345 }
1346#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1347
1348 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1349flushPage:
1350 /*
1351 * Not worth it, so flush it.
1352 *
1353 * If we considered it to be reused, don't go back to ring-3
1354 * to emulate failed instructions since we usually cannot
1355 * interpret then. This may be a bit risky, in which case
1356 * the reuse detection must be fixed.
1357 */
1358 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1359 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1360 && fReused)
1361 {
1362 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1363 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1364 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1365 }
1366 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1367 pgmUnlock(pVM);
1368 return rc;
1369}
1370
1371# endif /* !IN_RING3 */
1372
1373# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1374
1375# ifdef VBOX_STRICT
1376/**
1377 * Check references to guest physical memory in a PAE / PAE page table.
1378 *
1379 * @param pPool The pool.
1380 * @param pPage The page.
1381 * @param pShwPT The shadow page table (mapping of the page).
1382 * @param pGstPT The guest page table.
1383 */
1384static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
1385{
1386 unsigned cErrors = 0;
1387 int LastRc = -1; /* initialized to shut up gcc */
1388 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1389 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1390
1391#ifdef VBOX_STRICT
1392 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1393 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1394#endif
1395 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1396 {
1397 if (pShwPT->a[i].n.u1Present)
1398 {
1399 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1400 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1401 if ( rc != VINF_SUCCESS
1402 || (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) != HCPhys)
1403 {
1404 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1405 LastPTE = i;
1406 LastRc = rc;
1407 LastHCPhys = HCPhys;
1408 cErrors++;
1409
1410 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1411 rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pPage->GCPhys, &HCPhysPT);
1412 AssertRC(rc);
1413
1414 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1415 {
1416 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1417
1418 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1419 {
1420 PX86PTPAE pShwPT2 = (PX86PTPAE)PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pTempPage);
1421
1422 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1423 {
1424 if ( pShwPT2->a[j].n.u1Present
1425 && pShwPT2->a[j].n.u1Write
1426 && ((pShwPT2->a[j].u & X86_PTE_PAE_PG_MASK) == HCPhysPT))
1427 {
1428 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, pShwPT->a[j].u, pShwPT2->a[j].u));
1429 }
1430 }
1431 }
1432 }
1433 }
1434 }
1435 }
1436 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, pShwPT->a[LastPTE].u, LastHCPhys));
1437}
1438# endif /* VBOX_STRICT */
1439
1440/**
1441 * Clear references to guest physical memory in a PAE / PAE page table.
1442 *
1443 * @returns nr of changed PTEs
1444 * @param pPool The pool.
1445 * @param pPage The page.
1446 * @param pShwPT The shadow page table (mapping of the page).
1447 * @param pGstPT The guest page table.
1448 * @param pOldGstPT The old cached guest page table.
1449 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1450 * @param pfFlush Flush reused page table (out)
1451 */
1452DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT, PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1453{
1454 unsigned cChanged = 0;
1455
1456#ifdef VBOX_STRICT
1457 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1458 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1459#endif
1460 *pfFlush = false;
1461
1462 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1463 {
1464 /* Check the new value written by the guest. If present and with a bogus physical address, then
1465 * it's fairly safe to assume the guest is reusing the PT.
1466 */
1467 if ( fAllowRemoval
1468 && pGstPT->a[i].n.u1Present)
1469 {
1470 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1471 {
1472 *pfFlush = true;
1473 return ++cChanged;
1474 }
1475 }
1476 if (pShwPT->a[i].n.u1Present)
1477 {
1478 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1479 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1480 {
1481#ifdef VBOX_STRICT
1482 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1483 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1484 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1485#endif
1486 uint64_t uHostAttr = pShwPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1487 bool fHostRW = !!(pShwPT->a[i].u & X86_PTE_RW);
1488 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1489 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1490
1491 if ( uHostAttr == uGuestAttr
1492 && fHostRW <= fGuestRW)
1493 continue;
1494 }
1495 cChanged++;
1496 /* Something was changed, so flush it. */
1497 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1498 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1499 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
1500 ASMAtomicWriteSize(&pShwPT->a[i].u, 0);
1501 }
1502 }
1503 return cChanged;
1504}
1505
1506
1507/**
1508 * Flush a dirty page
1509 *
1510 * @param pVM VM Handle.
1511 * @param pPool The pool.
1512 * @param idxSlot Dirty array slot index
1513 * @param fAllowRemoval Allow a reused page table to be removed
1514 */
1515static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1516{
1517 PPGMPOOLPAGE pPage;
1518 unsigned idxPage;
1519
1520 Assert(idxSlot < RT_ELEMENTS(pPool->aIdxDirtyPages));
1521 if (pPool->aIdxDirtyPages[idxSlot] == NIL_PGMPOOL_IDX)
1522 return;
1523
1524 idxPage = pPool->aIdxDirtyPages[idxSlot];
1525 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1526 pPage = &pPool->aPages[idxPage];
1527 Assert(pPage->idx == idxPage);
1528 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1529
1530 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1531 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1532
1533 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1534 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys);
1535 Assert(rc == VINF_SUCCESS);
1536 pPage->fDirty = false;
1537
1538#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1539 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(VMMGetCpu(pVM));
1540#endif
1541
1542#ifdef VBOX_STRICT
1543 uint64_t fFlags = 0;
1544 RTHCPHYS HCPhys;
1545 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1546 AssertMsg( ( rc == VINF_SUCCESS
1547 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1548 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1549 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1550 || rc == VERR_PAGE_NOT_PRESENT,
1551 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1552#endif
1553
1554 /* Flush those PTEs that have changed. */
1555 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1556 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1557 void *pvGst;
1558 bool fFlush;
1559 rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1560 unsigned cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst, (PCX86PTPAE)&pPool->aDirtyPages[idxSlot][0], fAllowRemoval, &fFlush);
1561 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1562 /** Note: we might want to consider keeping the dirty page active in case there were many changes. */
1563
1564 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1565 Assert(pPage->cModifications);
1566 if (cChanges < 4)
1567 pPage->cModifications = 1; /* must use > 0 here */
1568 else
1569 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1570
1571 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1572 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages))
1573 pPool->idxFreeDirtyPage = idxSlot;
1574
1575 pPool->cDirtyPages--;
1576 pPool->aIdxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1577 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1578 if (fFlush)
1579 {
1580 Assert(fAllowRemoval);
1581 Log(("Flush reused page table!\n"));
1582 pgmPoolFlushPage(pPool, pPage);
1583 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1584 }
1585 else
1586 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1587
1588#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1589 PGMDynMapPopAutoSubset(VMMGetCpu(pVM), iPrevSubset);
1590#endif
1591}
1592
1593# ifndef IN_RING3
1594/**
1595 * Add a new dirty page
1596 *
1597 * @param pVM VM Handle.
1598 * @param pPool The pool.
1599 * @param pPage The page.
1600 */
1601void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1602{
1603 unsigned idxFree;
1604
1605 Assert(PGMIsLocked(pVM));
1606 AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
1607 Assert(!pPage->fDirty);
1608
1609 idxFree = pPool->idxFreeDirtyPage;
1610 Assert(idxFree < RT_ELEMENTS(pPool->aIdxDirtyPages));
1611 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1612
1613 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aIdxDirtyPages))
1614 {
1615 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1616 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1617 }
1618 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages));
1619 AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1620
1621 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1622
1623 /* Make a copy of the guest page table as we require valid GCPhys addresses when removing
1624 * references to physical pages. (the HCPhys linear lookup is *extremely* expensive!)
1625 */
1626 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1627 void *pvGst;
1628 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1629 memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
1630#ifdef VBOX_STRICT
1631 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1632#endif
1633
1634 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1635 pPage->fDirty = true;
1636 pPage->idxDirty = idxFree;
1637 pPool->aIdxDirtyPages[idxFree] = pPage->idx;
1638 pPool->cDirtyPages++;
1639
1640 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1641 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages)
1642 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1643 {
1644 unsigned i;
1645 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1646 {
1647 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1648 if (pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1649 {
1650 pPool->idxFreeDirtyPage = idxFree;
1651 break;
1652 }
1653 }
1654 Assert(i != RT_ELEMENTS(pPool->aIdxDirtyPages));
1655 }
1656
1657 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages) || pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1658 return;
1659}
1660# endif /* !IN_RING3 */
1661
1662/**
1663 * Check if the specified page is dirty (not write monitored)
1664 *
1665 * @return dirty or not
1666 * @param pVM VM Handle.
1667 * @param GCPhys Guest physical address
1668 */
1669bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1670{
1671 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1672 Assert(PGMIsLocked(pVM));
1673 if (!pPool->cDirtyPages)
1674 return false;
1675
1676 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1677
1678 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1679 {
1680 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1681 {
1682 PPGMPOOLPAGE pPage;
1683 unsigned idxPage = pPool->aIdxDirtyPages[i];
1684
1685 pPage = &pPool->aPages[idxPage];
1686 if (pPage->GCPhys == GCPhys)
1687 return true;
1688 }
1689 }
1690 return false;
1691}
1692
1693/**
1694 * Reset all dirty pages by reinstating page monitoring.
1695 *
1696 * @param pVM VM Handle.
1697 */
1698void pgmPoolResetDirtyPages(PVM pVM)
1699{
1700 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1701 Assert(PGMIsLocked(pVM));
1702 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1703
1704 if (!pPool->cDirtyPages)
1705 return;
1706
1707 Log(("pgmPoolResetDirtyPages\n"));
1708 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1709 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1710
1711 pPool->idxFreeDirtyPage = 0;
1712 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1713 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1714 {
1715 unsigned i;
1716 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1717 {
1718 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1719 {
1720 pPool->idxFreeDirtyPage = i;
1721 break;
1722 }
1723 }
1724 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1725 }
1726
1727 Assert(pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages));
1728 return;
1729}
1730
1731/**
1732 * Reset all dirty pages by reinstating page monitoring.
1733 *
1734 * @param pVM VM Handle.
1735 * @param GCPhysPT Physical address of the page table
1736 */
1737void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1738{
1739 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1740 Assert(PGMIsLocked(pVM));
1741 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1742 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aIdxDirtyPages);
1743
1744 if (!pPool->cDirtyPages)
1745 return;
1746
1747 GCPhysPT = GCPhysPT & ~(RTGCPHYS)(PAGE_SIZE - 1);
1748
1749 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1750 {
1751 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1752 {
1753 unsigned idxPage = pPool->aIdxDirtyPages[i];
1754
1755 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1756 if (pPage->GCPhys == GCPhysPT)
1757 {
1758 idxDirtyPage = i;
1759 break;
1760 }
1761 }
1762 }
1763
1764 if (idxDirtyPage != RT_ELEMENTS(pPool->aIdxDirtyPages))
1765 {
1766 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1767 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1768 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1769 {
1770 unsigned i;
1771 for (i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1772 {
1773 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1774 {
1775 pPool->idxFreeDirtyPage = i;
1776 break;
1777 }
1778 }
1779 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1780 }
1781 }
1782}
1783
1784# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1785
1786/**
1787 * Inserts a page into the GCPhys hash table.
1788 *
1789 * @param pPool The pool.
1790 * @param pPage The page.
1791 */
1792DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1793{
1794 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1795 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1796 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1797 pPage->iNext = pPool->aiHash[iHash];
1798 pPool->aiHash[iHash] = pPage->idx;
1799}
1800
1801
1802/**
1803 * Removes a page from the GCPhys hash table.
1804 *
1805 * @param pPool The pool.
1806 * @param pPage The page.
1807 */
1808DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1809{
1810 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1811 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1812 if (pPool->aiHash[iHash] == pPage->idx)
1813 pPool->aiHash[iHash] = pPage->iNext;
1814 else
1815 {
1816 uint16_t iPrev = pPool->aiHash[iHash];
1817 for (;;)
1818 {
1819 const int16_t i = pPool->aPages[iPrev].iNext;
1820 if (i == pPage->idx)
1821 {
1822 pPool->aPages[iPrev].iNext = pPage->iNext;
1823 break;
1824 }
1825 if (i == NIL_PGMPOOL_IDX)
1826 {
1827 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1828 break;
1829 }
1830 iPrev = i;
1831 }
1832 }
1833 pPage->iNext = NIL_PGMPOOL_IDX;
1834}
1835
1836
1837/**
1838 * Frees up one cache page.
1839 *
1840 * @returns VBox status code.
1841 * @retval VINF_SUCCESS on success.
1842 * @param pPool The pool.
1843 * @param iUser The user index.
1844 */
1845static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1846{
1847#ifndef IN_RC
1848 const PVM pVM = pPool->CTX_SUFF(pVM);
1849#endif
1850 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1851 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1852
1853 /*
1854 * Select one page from the tail of the age list.
1855 */
1856 PPGMPOOLPAGE pPage;
1857 for (unsigned iLoop = 0; ; iLoop++)
1858 {
1859 uint16_t iToFree = pPool->iAgeTail;
1860 if (iToFree == iUser)
1861 iToFree = pPool->aPages[iToFree].iAgePrev;
1862/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1863 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1864 {
1865 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1866 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1867 {
1868 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1869 continue;
1870 iToFree = i;
1871 break;
1872 }
1873 }
1874*/
1875 Assert(iToFree != iUser);
1876 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1877 pPage = &pPool->aPages[iToFree];
1878
1879 /*
1880 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1881 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1882 */
1883 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1884 break;
1885 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1886 pgmPoolCacheUsed(pPool, pPage);
1887 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1888 }
1889
1890 /*
1891 * Found a usable page, flush it and return.
1892 */
1893 int rc = pgmPoolFlushPage(pPool, pPage);
1894 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1895 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
1896 if (rc == VINF_SUCCESS)
1897 PGM_INVL_ALL_VCPU_TLBS(pVM);
1898 return rc;
1899}
1900
1901
1902/**
1903 * Checks if a kind mismatch is really a page being reused
1904 * or if it's just normal remappings.
1905 *
1906 * @returns true if reused and the cached page (enmKind1) should be flushed
1907 * @returns false if not reused.
1908 * @param enmKind1 The kind of the cached page.
1909 * @param enmKind2 The kind of the requested page.
1910 */
1911static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1912{
1913 switch (enmKind1)
1914 {
1915 /*
1916 * Never reuse them. There is no remapping in non-paging mode.
1917 */
1918 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1919 case PGMPOOLKIND_32BIT_PD_PHYS:
1920 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1921 case PGMPOOLKIND_PAE_PD_PHYS:
1922 case PGMPOOLKIND_PAE_PDPT_PHYS:
1923 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1924 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1925 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1926 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1927 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1928 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1929 return false;
1930
1931 /*
1932 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1933 */
1934 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1935 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1936 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1937 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1938 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1939 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1940 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1941 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1942 case PGMPOOLKIND_32BIT_PD:
1943 case PGMPOOLKIND_PAE_PDPT:
1944 switch (enmKind2)
1945 {
1946 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1947 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1948 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1949 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1950 case PGMPOOLKIND_64BIT_PML4:
1951 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1952 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1953 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1954 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1955 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1956 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1957 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1958 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1959 return true;
1960 default:
1961 return false;
1962 }
1963
1964 /*
1965 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1966 */
1967 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1968 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1969 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1970 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1971 case PGMPOOLKIND_64BIT_PML4:
1972 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1973 switch (enmKind2)
1974 {
1975 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1976 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1977 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1978 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1979 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1980 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1981 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1982 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1983 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1984 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1985 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1986 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1987 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1988 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1989 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1990 return true;
1991 default:
1992 return false;
1993 }
1994
1995 /*
1996 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1997 */
1998 case PGMPOOLKIND_ROOT_NESTED:
1999 return false;
2000
2001 default:
2002 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2003 }
2004}
2005
2006
2007/**
2008 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2009 *
2010 * @returns VBox status code.
2011 * @retval VINF_PGM_CACHED_PAGE on success.
2012 * @retval VERR_FILE_NOT_FOUND if not found.
2013 * @param pPool The pool.
2014 * @param GCPhys The GC physical address of the page we're gonna shadow.
2015 * @param enmKind The kind of mapping.
2016 * @param enmAccess Access type for the mapping (only relevant for big pages)
2017 * @param iUser The shadow page pool index of the user table.
2018 * @param iUserTable The index into the user table (shadowed).
2019 * @param ppPage Where to store the pointer to the page.
2020 */
2021static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2022{
2023#ifndef IN_RC
2024 const PVM pVM = pPool->CTX_SUFF(pVM);
2025#endif
2026 /*
2027 * Look up the GCPhys in the hash.
2028 */
2029 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2030 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2031 if (i != NIL_PGMPOOL_IDX)
2032 {
2033 do
2034 {
2035 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2036 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2037 if (pPage->GCPhys == GCPhys)
2038 {
2039 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2040 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2041 {
2042 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2043 * doesn't flush it in case there are no more free use records.
2044 */
2045 pgmPoolCacheUsed(pPool, pPage);
2046
2047 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2048 if (RT_SUCCESS(rc))
2049 {
2050 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2051 *ppPage = pPage;
2052 if (pPage->cModifications)
2053 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2054 STAM_COUNTER_INC(&pPool->StatCacheHits);
2055 return VINF_PGM_CACHED_PAGE;
2056 }
2057 return rc;
2058 }
2059
2060 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2061 {
2062 /*
2063 * The kind is different. In some cases we should now flush the page
2064 * as it has been reused, but in most cases this is normal remapping
2065 * of PDs as PT or big pages using the GCPhys field in a slightly
2066 * different way than the other kinds.
2067 */
2068 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2069 {
2070 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2071 pgmPoolFlushPage(pPool, pPage);
2072 break;
2073 }
2074 }
2075 }
2076
2077 /* next */
2078 i = pPage->iNext;
2079 } while (i != NIL_PGMPOOL_IDX);
2080 }
2081
2082 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2083 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2084 return VERR_FILE_NOT_FOUND;
2085}
2086
2087
2088/**
2089 * Inserts a page into the cache.
2090 *
2091 * @param pPool The pool.
2092 * @param pPage The cached page.
2093 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2094 */
2095static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2096{
2097 /*
2098 * Insert into the GCPhys hash if the page is fit for that.
2099 */
2100 Assert(!pPage->fCached);
2101 if (fCanBeCached)
2102 {
2103 pPage->fCached = true;
2104 pgmPoolHashInsert(pPool, pPage);
2105 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2106 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2107 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2108 }
2109 else
2110 {
2111 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2112 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2113 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2114 }
2115
2116 /*
2117 * Insert at the head of the age list.
2118 */
2119 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2120 pPage->iAgeNext = pPool->iAgeHead;
2121 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2122 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2123 else
2124 pPool->iAgeTail = pPage->idx;
2125 pPool->iAgeHead = pPage->idx;
2126}
2127
2128
2129/**
2130 * Flushes a cached page.
2131 *
2132 * @param pPool The pool.
2133 * @param pPage The cached page.
2134 */
2135static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2136{
2137 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2138
2139 /*
2140 * Remove the page from the hash.
2141 */
2142 if (pPage->fCached)
2143 {
2144 pPage->fCached = false;
2145 pgmPoolHashRemove(pPool, pPage);
2146 }
2147 else
2148 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2149
2150 /*
2151 * Remove it from the age list.
2152 */
2153 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2154 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2155 else
2156 pPool->iAgeTail = pPage->iAgePrev;
2157 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2158 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2159 else
2160 pPool->iAgeHead = pPage->iAgeNext;
2161 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2162 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2163}
2164
2165
2166/**
2167 * Looks for pages sharing the monitor.
2168 *
2169 * @returns Pointer to the head page.
2170 * @returns NULL if not found.
2171 * @param pPool The Pool
2172 * @param pNewPage The page which is going to be monitored.
2173 */
2174static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2175{
2176 /*
2177 * Look up the GCPhys in the hash.
2178 */
2179 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2180 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2181 if (i == NIL_PGMPOOL_IDX)
2182 return NULL;
2183 do
2184 {
2185 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2186 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2187 && pPage != pNewPage)
2188 {
2189 switch (pPage->enmKind)
2190 {
2191 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2192 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2193 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2194 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2195 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2196 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2197 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2198 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2199 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2200 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2201 case PGMPOOLKIND_64BIT_PML4:
2202 case PGMPOOLKIND_32BIT_PD:
2203 case PGMPOOLKIND_PAE_PDPT:
2204 {
2205 /* find the head */
2206 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2207 {
2208 Assert(pPage->iMonitoredPrev != pPage->idx);
2209 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2210 }
2211 return pPage;
2212 }
2213
2214 /* ignore, no monitoring. */
2215 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2216 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2217 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2218 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2219 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2220 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2221 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2222 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2223 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2224 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2225 case PGMPOOLKIND_ROOT_NESTED:
2226 case PGMPOOLKIND_PAE_PD_PHYS:
2227 case PGMPOOLKIND_PAE_PDPT_PHYS:
2228 case PGMPOOLKIND_32BIT_PD_PHYS:
2229 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2230 break;
2231 default:
2232 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2233 }
2234 }
2235
2236 /* next */
2237 i = pPage->iNext;
2238 } while (i != NIL_PGMPOOL_IDX);
2239 return NULL;
2240}
2241
2242
2243/**
2244 * Enabled write monitoring of a guest page.
2245 *
2246 * @returns VBox status code.
2247 * @retval VINF_SUCCESS on success.
2248 * @param pPool The pool.
2249 * @param pPage The cached page.
2250 */
2251static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2252{
2253 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
2254
2255 /*
2256 * Filter out the relevant kinds.
2257 */
2258 switch (pPage->enmKind)
2259 {
2260 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2261 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2262 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2263 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2264 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2265 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2266 case PGMPOOLKIND_64BIT_PML4:
2267 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2268 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2269 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2270 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2271 case PGMPOOLKIND_32BIT_PD:
2272 case PGMPOOLKIND_PAE_PDPT:
2273 break;
2274
2275 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2276 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2277 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2278 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2279 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2280 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2281 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2282 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2283 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2284 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2285 case PGMPOOLKIND_ROOT_NESTED:
2286 /* Nothing to monitor here. */
2287 return VINF_SUCCESS;
2288
2289 case PGMPOOLKIND_32BIT_PD_PHYS:
2290 case PGMPOOLKIND_PAE_PDPT_PHYS:
2291 case PGMPOOLKIND_PAE_PD_PHYS:
2292 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2293 /* Nothing to monitor here. */
2294 return VINF_SUCCESS;
2295 default:
2296 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2297 }
2298
2299 /*
2300 * Install handler.
2301 */
2302 int rc;
2303 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2304 if (pPageHead)
2305 {
2306 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2307 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2308
2309#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2310 if (pPageHead->fDirty)
2311 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2312#endif
2313
2314 pPage->iMonitoredPrev = pPageHead->idx;
2315 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2316 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2317 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2318 pPageHead->iMonitoredNext = pPage->idx;
2319 rc = VINF_SUCCESS;
2320 }
2321 else
2322 {
2323 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2324 PVM pVM = pPool->CTX_SUFF(pVM);
2325 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2326 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2327 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
2328 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2329 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2330 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2331 pPool->pszAccessHandler);
2332 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2333 * the heap size should suffice. */
2334 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2335 AssertMsg(!(VMMGetCpu(pVM)->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(VMMGetCpu(pVM), VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", VMMGetCpu(pVM)->pgm.s.fSyncFlags, VMCPU_FF_ISSET(VMMGetCpu(pVM), VMCPU_FF_PGM_SYNC_CR3)));
2336 }
2337 pPage->fMonitored = true;
2338 return rc;
2339}
2340
2341
2342/**
2343 * Disables write monitoring of a guest page.
2344 *
2345 * @returns VBox status code.
2346 * @retval VINF_SUCCESS on success.
2347 * @param pPool The pool.
2348 * @param pPage The cached page.
2349 */
2350static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2351{
2352 /*
2353 * Filter out the relevant kinds.
2354 */
2355 switch (pPage->enmKind)
2356 {
2357 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2358 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2359 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2360 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2361 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2362 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2363 case PGMPOOLKIND_64BIT_PML4:
2364 case PGMPOOLKIND_32BIT_PD:
2365 case PGMPOOLKIND_PAE_PDPT:
2366 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2367 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2368 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2369 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2370 break;
2371
2372 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2373 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2374 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2375 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2376 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2377 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2378 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2379 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2380 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2381 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2382 case PGMPOOLKIND_ROOT_NESTED:
2383 case PGMPOOLKIND_PAE_PD_PHYS:
2384 case PGMPOOLKIND_PAE_PDPT_PHYS:
2385 case PGMPOOLKIND_32BIT_PD_PHYS:
2386 /* Nothing to monitor here. */
2387 return VINF_SUCCESS;
2388
2389 default:
2390 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2391 }
2392
2393 /*
2394 * Remove the page from the monitored list or uninstall it if last.
2395 */
2396 const PVM pVM = pPool->CTX_SUFF(pVM);
2397 int rc;
2398 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2399 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2400 {
2401 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2402 {
2403 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2404 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2405 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2406 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2407 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2408 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2409 pPool->pszAccessHandler);
2410 AssertFatalRCSuccess(rc);
2411 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2412 }
2413 else
2414 {
2415 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2416 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2417 {
2418 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2419 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2420 }
2421 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2422 rc = VINF_SUCCESS;
2423 }
2424 }
2425 else
2426 {
2427 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2428 AssertFatalRC(rc);
2429#ifdef VBOX_STRICT
2430 PVMCPU pVCpu = VMMGetCpu(pVM);
2431#endif
2432 AssertMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2433 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2434 }
2435 pPage->fMonitored = false;
2436
2437 /*
2438 * Remove it from the list of modified pages (if in it).
2439 */
2440 pgmPoolMonitorModifiedRemove(pPool, pPage);
2441
2442 return rc;
2443}
2444
2445
2446/**
2447 * Inserts the page into the list of modified pages.
2448 *
2449 * @param pPool The pool.
2450 * @param pPage The page.
2451 */
2452void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2453{
2454 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2455 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2456 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2457 && pPool->iModifiedHead != pPage->idx,
2458 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2459 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2460 pPool->iModifiedHead, pPool->cModifiedPages));
2461
2462 pPage->iModifiedNext = pPool->iModifiedHead;
2463 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2464 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2465 pPool->iModifiedHead = pPage->idx;
2466 pPool->cModifiedPages++;
2467#ifdef VBOX_WITH_STATISTICS
2468 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2469 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2470#endif
2471}
2472
2473
2474/**
2475 * Removes the page from the list of modified pages and resets the
2476 * moficiation counter.
2477 *
2478 * @param pPool The pool.
2479 * @param pPage The page which is believed to be in the list of modified pages.
2480 */
2481static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2482{
2483 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2484 if (pPool->iModifiedHead == pPage->idx)
2485 {
2486 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2487 pPool->iModifiedHead = pPage->iModifiedNext;
2488 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2489 {
2490 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2491 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2492 }
2493 pPool->cModifiedPages--;
2494 }
2495 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2496 {
2497 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2498 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2499 {
2500 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2501 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2502 }
2503 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2504 pPool->cModifiedPages--;
2505 }
2506 else
2507 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2508 pPage->cModifications = 0;
2509}
2510
2511
2512/**
2513 * Zaps the list of modified pages, resetting their modification counters in the process.
2514 *
2515 * @param pVM The VM handle.
2516 */
2517static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2518{
2519 pgmLock(pVM);
2520 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2521 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2522
2523 unsigned cPages = 0; NOREF(cPages);
2524
2525#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2526 pgmPoolResetDirtyPages(pVM);
2527#endif
2528
2529 uint16_t idx = pPool->iModifiedHead;
2530 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2531 while (idx != NIL_PGMPOOL_IDX)
2532 {
2533 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2534 idx = pPage->iModifiedNext;
2535 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2536 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2537 pPage->cModifications = 0;
2538 Assert(++cPages);
2539 }
2540 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2541 pPool->cModifiedPages = 0;
2542 pgmUnlock(pVM);
2543}
2544
2545
2546/**
2547 * Handle SyncCR3 pool tasks
2548 *
2549 * @returns VBox status code.
2550 * @retval VINF_SUCCESS if successfully added.
2551 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2552 * @param pVCpu The VMCPU handle.
2553 * @remark Should only be used when monitoring is available, thus placed in
2554 * the PGMPOOL_WITH_MONITORING #ifdef.
2555 */
2556int pgmPoolSyncCR3(PVMCPU pVCpu)
2557{
2558 PVM pVM = pVCpu->CTX_SUFF(pVM);
2559 LogFlow(("pgmPoolSyncCR3\n"));
2560
2561 /*
2562 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2563 * Occasionally we will have to clear all the shadow page tables because we wanted
2564 * to monitor a page which was mapped by too many shadowed page tables. This operation
2565 * sometimes refered to as a 'lightweight flush'.
2566 */
2567# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2568 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2569 pgmR3PoolClearAll(pVM);
2570# else /* !IN_RING3 */
2571 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2572 {
2573 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2574 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2575
2576 /* Make sure all other VCPUs return to ring 3. */
2577 if (pVM->cCpus > 1)
2578 {
2579 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2580 PGM_INVL_ALL_VCPU_TLBS(pVM);
2581 }
2582 return VINF_PGM_SYNC_CR3;
2583 }
2584# endif /* !IN_RING3 */
2585 else
2586 {
2587 pgmPoolMonitorModifiedClearAll(pVM);
2588
2589 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2590 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2591 {
2592 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2593 return pgmPoolSyncCR3(pVCpu);
2594 }
2595 }
2596 return VINF_SUCCESS;
2597}
2598
2599
2600/**
2601 * Frees up at least one user entry.
2602 *
2603 * @returns VBox status code.
2604 * @retval VINF_SUCCESS if successfully added.
2605 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2606 * @param pPool The pool.
2607 * @param iUser The user index.
2608 */
2609static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2610{
2611 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2612 /*
2613 * Just free cached pages in a braindead fashion.
2614 */
2615 /** @todo walk the age list backwards and free the first with usage. */
2616 int rc = VINF_SUCCESS;
2617 do
2618 {
2619 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2620 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2621 rc = rc2;
2622 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2623 return rc;
2624}
2625
2626
2627/**
2628 * Inserts a page into the cache.
2629 *
2630 * This will create user node for the page, insert it into the GCPhys
2631 * hash, and insert it into the age list.
2632 *
2633 * @returns VBox status code.
2634 * @retval VINF_SUCCESS if successfully added.
2635 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2636 * @param pPool The pool.
2637 * @param pPage The cached page.
2638 * @param GCPhys The GC physical address of the page we're gonna shadow.
2639 * @param iUser The user index.
2640 * @param iUserTable The user table index.
2641 */
2642DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2643{
2644 int rc = VINF_SUCCESS;
2645 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2646
2647 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2648
2649#ifdef VBOX_STRICT
2650 /*
2651 * Check that the entry doesn't already exists.
2652 */
2653 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2654 {
2655 uint16_t i = pPage->iUserHead;
2656 do
2657 {
2658 Assert(i < pPool->cMaxUsers);
2659 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2660 i = paUsers[i].iNext;
2661 } while (i != NIL_PGMPOOL_USER_INDEX);
2662 }
2663#endif
2664
2665 /*
2666 * Find free a user node.
2667 */
2668 uint16_t i = pPool->iUserFreeHead;
2669 if (i == NIL_PGMPOOL_USER_INDEX)
2670 {
2671 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2672 if (RT_FAILURE(rc))
2673 return rc;
2674 i = pPool->iUserFreeHead;
2675 }
2676
2677 /*
2678 * Unlink the user node from the free list,
2679 * initialize and insert it into the user list.
2680 */
2681 pPool->iUserFreeHead = paUsers[i].iNext;
2682 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2683 paUsers[i].iUser = iUser;
2684 paUsers[i].iUserTable = iUserTable;
2685 pPage->iUserHead = i;
2686
2687 /*
2688 * Insert into cache and enable monitoring of the guest page if enabled.
2689 *
2690 * Until we implement caching of all levels, including the CR3 one, we'll
2691 * have to make sure we don't try monitor & cache any recursive reuse of
2692 * a monitored CR3 page. Because all windows versions are doing this we'll
2693 * have to be able to do combined access monitoring, CR3 + PT and
2694 * PD + PT (guest PAE).
2695 *
2696 * Update:
2697 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2698 */
2699 const bool fCanBeMonitored = true;
2700 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2701 if (fCanBeMonitored)
2702 {
2703 rc = pgmPoolMonitorInsert(pPool, pPage);
2704 AssertRC(rc);
2705 }
2706 return rc;
2707}
2708
2709
2710/**
2711 * Adds a user reference to a page.
2712 *
2713 * This will move the page to the head of the
2714 *
2715 * @returns VBox status code.
2716 * @retval VINF_SUCCESS if successfully added.
2717 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2718 * @param pPool The pool.
2719 * @param pPage The cached page.
2720 * @param iUser The user index.
2721 * @param iUserTable The user table.
2722 */
2723static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2724{
2725 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2726
2727 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2728
2729# ifdef VBOX_STRICT
2730 /*
2731 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2732 */
2733 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2734 {
2735 uint16_t i = pPage->iUserHead;
2736 do
2737 {
2738 Assert(i < pPool->cMaxUsers);
2739 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2740 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2741 i = paUsers[i].iNext;
2742 } while (i != NIL_PGMPOOL_USER_INDEX);
2743 }
2744# endif
2745
2746 /*
2747 * Allocate a user node.
2748 */
2749 uint16_t i = pPool->iUserFreeHead;
2750 if (i == NIL_PGMPOOL_USER_INDEX)
2751 {
2752 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2753 if (RT_FAILURE(rc))
2754 return rc;
2755 i = pPool->iUserFreeHead;
2756 }
2757 pPool->iUserFreeHead = paUsers[i].iNext;
2758
2759 /*
2760 * Initialize the user node and insert it.
2761 */
2762 paUsers[i].iNext = pPage->iUserHead;
2763 paUsers[i].iUser = iUser;
2764 paUsers[i].iUserTable = iUserTable;
2765 pPage->iUserHead = i;
2766
2767# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2768 if (pPage->fDirty)
2769 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2770# endif
2771
2772 /*
2773 * Tell the cache to update its replacement stats for this page.
2774 */
2775 pgmPoolCacheUsed(pPool, pPage);
2776 return VINF_SUCCESS;
2777}
2778
2779
2780/**
2781 * Frees a user record associated with a page.
2782 *
2783 * This does not clear the entry in the user table, it simply replaces the
2784 * user record to the chain of free records.
2785 *
2786 * @param pPool The pool.
2787 * @param HCPhys The HC physical address of the shadow page.
2788 * @param iUser The shadow page pool index of the user table.
2789 * @param iUserTable The index into the user table (shadowed).
2790 */
2791static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2792{
2793 /*
2794 * Unlink and free the specified user entry.
2795 */
2796 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2797
2798 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2799 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2800 uint16_t i = pPage->iUserHead;
2801 if ( i != NIL_PGMPOOL_USER_INDEX
2802 && paUsers[i].iUser == iUser
2803 && paUsers[i].iUserTable == iUserTable)
2804 {
2805 pPage->iUserHead = paUsers[i].iNext;
2806
2807 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2808 paUsers[i].iNext = pPool->iUserFreeHead;
2809 pPool->iUserFreeHead = i;
2810 return;
2811 }
2812
2813 /* General: Linear search. */
2814 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2815 while (i != NIL_PGMPOOL_USER_INDEX)
2816 {
2817 if ( paUsers[i].iUser == iUser
2818 && paUsers[i].iUserTable == iUserTable)
2819 {
2820 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2821 paUsers[iPrev].iNext = paUsers[i].iNext;
2822 else
2823 pPage->iUserHead = paUsers[i].iNext;
2824
2825 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2826 paUsers[i].iNext = pPool->iUserFreeHead;
2827 pPool->iUserFreeHead = i;
2828 return;
2829 }
2830 iPrev = i;
2831 i = paUsers[i].iNext;
2832 }
2833
2834 /* Fatal: didn't find it */
2835 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2836 iUser, iUserTable, pPage->GCPhys));
2837}
2838
2839
2840/**
2841 * Gets the entry size of a shadow table.
2842 *
2843 * @param enmKind The kind of page.
2844 *
2845 * @returns The size of the entry in bytes. That is, 4 or 8.
2846 * @returns If the kind is not for a table, an assertion is raised and 0 is
2847 * returned.
2848 */
2849DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2850{
2851 switch (enmKind)
2852 {
2853 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2854 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2855 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2856 case PGMPOOLKIND_32BIT_PD:
2857 case PGMPOOLKIND_32BIT_PD_PHYS:
2858 return 4;
2859
2860 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2861 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2862 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2863 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2864 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2865 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2866 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2867 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2868 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2869 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2870 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2871 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2872 case PGMPOOLKIND_64BIT_PML4:
2873 case PGMPOOLKIND_PAE_PDPT:
2874 case PGMPOOLKIND_ROOT_NESTED:
2875 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2876 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2877 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2878 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2879 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2880 case PGMPOOLKIND_PAE_PD_PHYS:
2881 case PGMPOOLKIND_PAE_PDPT_PHYS:
2882 return 8;
2883
2884 default:
2885 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2886 }
2887}
2888
2889
2890/**
2891 * Gets the entry size of a guest table.
2892 *
2893 * @param enmKind The kind of page.
2894 *
2895 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2896 * @returns If the kind is not for a table, an assertion is raised and 0 is
2897 * returned.
2898 */
2899DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2900{
2901 switch (enmKind)
2902 {
2903 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2904 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2905 case PGMPOOLKIND_32BIT_PD:
2906 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2907 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2908 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2909 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2910 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2911 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2912 return 4;
2913
2914 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2915 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2916 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2917 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2918 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2919 case PGMPOOLKIND_64BIT_PML4:
2920 case PGMPOOLKIND_PAE_PDPT:
2921 return 8;
2922
2923 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2924 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2925 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2926 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2927 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2928 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2929 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2930 case PGMPOOLKIND_ROOT_NESTED:
2931 case PGMPOOLKIND_PAE_PD_PHYS:
2932 case PGMPOOLKIND_PAE_PDPT_PHYS:
2933 case PGMPOOLKIND_32BIT_PD_PHYS:
2934 /** @todo can we return 0? (nobody is calling this...) */
2935 AssertFailed();
2936 return 0;
2937
2938 default:
2939 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2940 }
2941}
2942
2943
2944/**
2945 * Scans one shadow page table for mappings of a physical page.
2946 *
2947 * @returns true/false indicating removal of all relevant PTEs
2948 * @param pVM The VM handle.
2949 * @param pPhysPage The guest page in question.
2950 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
2951 * @param iShw The shadow page table.
2952 * @param cRefs The number of references made in that PT.
2953 */
2954static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
2955{
2956 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
2957 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2958 bool bRet = false;
2959
2960 /*
2961 * Assert sanity.
2962 */
2963 Assert(cRefs == 1);
2964 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2965 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2966
2967 /*
2968 * Then, clear the actual mappings to the page in the shadow PT.
2969 */
2970 switch (pPage->enmKind)
2971 {
2972 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2973 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2974 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2975 {
2976 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2977 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2978 uint32_t u32AndMask, u32OrMask;
2979
2980 u32AndMask = 0;
2981 u32OrMask = 0;
2982
2983 if (!fFlushPTEs)
2984 {
2985 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
2986 {
2987 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
2988 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
2989 u32OrMask = X86_PTE_RW;
2990 u32AndMask = UINT32_MAX;
2991 bRet = true;
2992 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
2993 break;
2994
2995 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
2996 u32OrMask = 0;
2997 u32AndMask = ~X86_PTE_RW;
2998 bRet = true;
2999 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3000 break;
3001 default:
3002 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3003 break;
3004 }
3005 }
3006 else
3007 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3008
3009 /* Update the counter if we're removing references. */
3010 if (!u32AndMask)
3011 {
3012 Assert(pPage->cPresent >= cRefs);
3013 Assert(pPool->cPresent >= cRefs);
3014 pPage->cPresent -= cRefs;
3015 pPool->cPresent -= cRefs;
3016 }
3017
3018 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3019 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3020 {
3021 X86PTE Pte;
3022
3023 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
3024 Pte.u = (pPT->a[i].u & u32AndMask) | u32OrMask;
3025 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3026 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3027
3028 ASMAtomicWriteSize(&pPT->a[i].u, Pte.u);
3029 cRefs--;
3030 if (!cRefs)
3031 return bRet;
3032 }
3033#ifdef LOG_ENABLED
3034 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3035 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3036 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3037 {
3038 Log(("i=%d cRefs=%d\n", i, cRefs--));
3039 }
3040#endif
3041 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3042 break;
3043 }
3044
3045 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3046 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3047 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3048 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3049 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3050 {
3051 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3052 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3053 uint64_t u64AndMask, u64OrMask;
3054
3055 u64OrMask = 0;
3056 u64AndMask = 0;
3057 if (!fFlushPTEs)
3058 {
3059 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3060 {
3061 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3062 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3063 u64OrMask = X86_PTE_RW;
3064 u64AndMask = UINT64_MAX;
3065 bRet = true;
3066 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3067 break;
3068
3069 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3070 u64OrMask = 0;
3071 u64AndMask = ~((uint64_t)X86_PTE_RW);
3072 bRet = true;
3073 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3074 break;
3075
3076 default:
3077 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3078 break;
3079 }
3080 }
3081 else
3082 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3083
3084 /* Update the counter if we're removing references. */
3085 if (!u64AndMask)
3086 {
3087 Assert(pPage->cPresent >= cRefs);
3088 Assert(pPool->cPresent >= cRefs);
3089 pPage->cPresent -= cRefs;
3090 pPool->cPresent -= cRefs;
3091 }
3092
3093 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3094 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3095 {
3096 X86PTEPAE Pte;
3097
3098 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3099 Pte.u = (pPT->a[i].u & u64AndMask) | u64OrMask;
3100 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3101 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3102
3103 ASMAtomicWriteSize(&pPT->a[i].u, Pte.u);
3104 cRefs--;
3105 if (!cRefs)
3106 return bRet;
3107 }
3108#ifdef LOG_ENABLED
3109 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3110 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3111 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3112 {
3113 Log(("i=%d cRefs=%d\n", i, cRefs--));
3114 }
3115#endif
3116 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind));
3117 break;
3118 }
3119
3120 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3121 {
3122 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3123 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3124 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3125 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3126 {
3127 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3128 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3129 pPT->a[i].u = 0;
3130 cRefs--;
3131
3132 /* Update the counter as we're removing references. */
3133 Assert(pPage->cPresent);
3134 Assert(pPool->cPresent);
3135 pPage->cPresent--;
3136 pPool->cPresent--;
3137
3138 if (!cRefs)
3139 return bRet;
3140 }
3141#ifdef LOG_ENABLED
3142 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3143 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3144 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3145 {
3146 Log(("i=%d cRefs=%d\n", i, cRefs--));
3147 }
3148#endif
3149 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3150 break;
3151 }
3152
3153#ifdef PGM_WITH_LARGE_PAGES
3154 /* Large page case only. */
3155 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3156 {
3157 Assert(HWACCMIsNestedPagingActive(pVM));
3158 Assert(cRefs == 1);
3159
3160 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3161 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3162 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPD->a); i++)
3163 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3164 {
3165 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64 cRefs=%#x\n", i, pPD->a[i], cRefs));
3166 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3167 pPD->a[i].u = 0;
3168 cRefs--;
3169
3170 /* Update the counter as we're removing references. */
3171 Assert(pPage->cPresent);
3172 Assert(pPool->cPresent);
3173 pPage->cPresent--;
3174 pPool->cPresent--;
3175
3176 if (!cRefs)
3177 return bRet;
3178 }
3179# ifdef LOG_ENABLED
3180 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3181 for (unsigned i = 0; i < RT_ELEMENTS(pPD->a); i++)
3182 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3183 {
3184 Log(("i=%d cRefs=%d\n", i, cRefs--));
3185 }
3186# endif
3187 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3188 break;
3189 }
3190
3191 /* AMD-V nested paging - @todo merge with EPT as we only check the parts that are identical. */
3192 case PGMPOOLKIND_PAE_PD_PHYS:
3193 {
3194 Assert(HWACCMIsNestedPagingActive(pVM));
3195 Assert(cRefs == 1);
3196
3197 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3198 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3199 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPD->a); i++)
3200 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3201 {
3202 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64 cRefs=%#x\n", i, pPD->a[i], cRefs));
3203 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3204 pPD->a[i].u = 0;
3205 cRefs--;
3206
3207 /* Update the counter as we're removing references. */
3208 Assert(pPage->cPresent);
3209 Assert(pPool->cPresent);
3210 pPage->cPresent--;
3211 pPool->cPresent--;
3212
3213 if (!cRefs)
3214 return bRet;
3215 }
3216# ifdef LOG_ENABLED
3217 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3218 for (unsigned i = 0; i < RT_ELEMENTS(pPD->a); i++)
3219 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3220 {
3221 Log(("i=%d cRefs=%d\n", i, cRefs--));
3222 }
3223# endif
3224 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3225 break;
3226 }
3227#endif /* PGM_WITH_LARGE_PAGES */
3228
3229 default:
3230 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3231 }
3232 return bRet;
3233}
3234
3235
3236/**
3237 * Scans one shadow page table for mappings of a physical page.
3238 *
3239 * @param pVM The VM handle.
3240 * @param pPhysPage The guest page in question.
3241 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3242 * @param iShw The shadow page table.
3243 * @param cRefs The number of references made in that PT.
3244 */
3245static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
3246{
3247 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3248
3249 /* We should only come here with when there's only one reference to this physical page. */
3250 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3251 Assert(cRefs == 1);
3252
3253 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
3254 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3255 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, cRefs);
3256 if (!fKeptPTEs)
3257 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3258 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3259}
3260
3261
3262/**
3263 * Flushes a list of shadow page tables mapping the same physical page.
3264 *
3265 * @param pVM The VM handle.
3266 * @param pPhysPage The guest page in question.
3267 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3268 * @param iPhysExt The physical cross reference extent list to flush.
3269 */
3270static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3271{
3272 Assert(PGMIsLockOwner(pVM));
3273 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3274 bool fKeepList = false;
3275
3276 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3277 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3278
3279 const uint16_t iPhysExtStart = iPhysExt;
3280 PPGMPOOLPHYSEXT pPhysExt;
3281 do
3282 {
3283 Assert(iPhysExt < pPool->cMaxPhysExts);
3284 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3285 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3286 {
3287 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3288 {
3289 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], 1);
3290 if (!fKeptPTEs)
3291 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3292 else
3293 fKeepList = true;
3294 }
3295 }
3296 /* next */
3297 iPhysExt = pPhysExt->iNext;
3298 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3299
3300 if (!fKeepList)
3301 {
3302 /* insert the list into the free list and clear the ram range entry. */
3303 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3304 pPool->iPhysExtFreeHead = iPhysExtStart;
3305 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3306 }
3307
3308 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3309}
3310
3311
3312/**
3313 * Flushes all shadow page table mappings of the given guest page.
3314 *
3315 * This is typically called when the host page backing the guest one has been
3316 * replaced or when the page protection was changed due to an access handler.
3317 *
3318 * @returns VBox status code.
3319 * @retval VINF_SUCCESS if all references has been successfully cleared.
3320 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3321 * pool cleaning. FF and sync flags are set.
3322 *
3323 * @param pVM The VM handle.
3324 * @param GCPhysPage GC physical address of the page in question
3325 * @param pPhysPage The guest page in question.
3326 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3327 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3328 * flushed, it is NOT touched if this isn't necessary.
3329 * The caller MUST initialized this to @a false.
3330 */
3331int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3332{
3333 PVMCPU pVCpu = VMMGetCpu(pVM);
3334 pgmLock(pVM);
3335 int rc = VINF_SUCCESS;
3336
3337#ifdef PGM_WITH_LARGE_PAGES
3338 /* Is this page part of a large page? */
3339 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3340 {
3341 PPGMPAGE pPhysBase;
3342 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3343
3344 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3345
3346 /* Fetch the large page base. */
3347 if (GCPhysBase != GCPhysPage)
3348 {
3349 pPhysBase = pgmPhysGetPage(&pVM->pgm.s, GCPhysBase);
3350 AssertFatal(pPhysBase);
3351 }
3352 else
3353 pPhysBase = pPhysPage;
3354
3355 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3356
3357 if (PGM_PAGE_GET_PDE_TYPE(pPhysBase) == PGM_PAGE_PDE_TYPE_PDE)
3358 {
3359 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3360 PGM_PAGE_SET_PDE_TYPE(pPhysBase, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3361
3362 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3363 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pPhysBase, fFlushPTEs, pfFlushTLBs);
3364
3365 *pfFlushTLBs = true;
3366 pgmUnlock(pVM);
3367 return rc;
3368 }
3369 }
3370#else
3371 NOREF(GCPhysPage);
3372#endif /* PGM_WITH_LARGE_PAGES */
3373
3374 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3375 if (u16)
3376 {
3377 /*
3378 * The zero page is currently screwing up the tracking and we'll
3379 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3380 * is defined, zero pages won't normally be mapped. Some kind of solution
3381 * will be needed for this problem of course, but it will have to wait...
3382 */
3383 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3384 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3385 rc = VINF_PGM_GCPHYS_ALIASED;
3386 else
3387 {
3388# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3389 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3390 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3391 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3392# endif
3393
3394 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3395 pgmPoolTrackFlushGCPhysPT(pVM,
3396 pPhysPage,
3397 fFlushPTEs,
3398 PGMPOOL_TD_GET_IDX(u16),
3399 PGMPOOL_TD_GET_CREFS(u16));
3400 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3401 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3402 else
3403 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3404 *pfFlushTLBs = true;
3405
3406# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3407 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3408# endif
3409 }
3410 }
3411
3412 if (rc == VINF_PGM_GCPHYS_ALIASED)
3413 {
3414 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3415 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3416 rc = VINF_PGM_SYNC_CR3;
3417 }
3418 pgmUnlock(pVM);
3419 return rc;
3420}
3421
3422
3423/**
3424 * Scans all shadow page tables for mappings of a physical page.
3425 *
3426 * This may be slow, but it's most likely more efficient than cleaning
3427 * out the entire page pool / cache.
3428 *
3429 * @returns VBox status code.
3430 * @retval VINF_SUCCESS if all references has been successfully cleared.
3431 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3432 * a page pool cleaning.
3433 *
3434 * @param pVM The VM handle.
3435 * @param pPhysPage The guest page in question.
3436 */
3437int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3438{
3439 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3440 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3441 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3442 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3443
3444#if 1
3445 /*
3446 * There is a limit to what makes sense.
3447 */
3448 if (pPool->cPresent > 1024)
3449 {
3450 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3451 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3452 return VINF_PGM_GCPHYS_ALIASED;
3453 }
3454#endif
3455
3456 /*
3457 * Iterate all the pages until we've encountered all that in use.
3458 * This is simple but not quite optimal solution.
3459 */
3460 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3461 const uint32_t u32 = u64;
3462 unsigned cLeft = pPool->cUsedPages;
3463 unsigned iPage = pPool->cCurPages;
3464 while (--iPage >= PGMPOOL_IDX_FIRST)
3465 {
3466 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3467 if ( pPage->GCPhys != NIL_RTGCPHYS
3468 && pPage->cPresent)
3469 {
3470 switch (pPage->enmKind)
3471 {
3472 /*
3473 * We only care about shadow page tables.
3474 */
3475 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3476 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3477 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3478 {
3479 unsigned cPresent = pPage->cPresent;
3480 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3481 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3482 if (pPT->a[i].n.u1Present)
3483 {
3484 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3485 {
3486 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3487 pPT->a[i].u = 0;
3488
3489 /* Update the counter as we're removing references. */
3490 Assert(pPage->cPresent);
3491 Assert(pPool->cPresent);
3492 pPage->cPresent--;
3493 pPool->cPresent--;
3494 }
3495 if (!--cPresent)
3496 break;
3497 }
3498 break;
3499 }
3500
3501 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3502 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3503 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3504 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3505 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3506 {
3507 unsigned cPresent = pPage->cPresent;
3508 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3509 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3510 if (pPT->a[i].n.u1Present)
3511 {
3512 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3513 {
3514 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3515 pPT->a[i].u = 0;
3516
3517 /* Update the counter as we're removing references. */
3518 Assert(pPage->cPresent);
3519 Assert(pPool->cPresent);
3520 pPage->cPresent--;
3521 pPool->cPresent--;
3522 }
3523 if (!--cPresent)
3524 break;
3525 }
3526 break;
3527 }
3528#ifndef IN_RC
3529 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3530 {
3531 unsigned cPresent = pPage->cPresent;
3532 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3533 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3534 if (pPT->a[i].n.u1Present)
3535 {
3536 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3537 {
3538 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3539 pPT->a[i].u = 0;
3540
3541 /* Update the counter as we're removing references. */
3542 Assert(pPage->cPresent);
3543 Assert(pPool->cPresent);
3544 pPage->cPresent--;
3545 pPool->cPresent--;
3546 }
3547 if (!--cPresent)
3548 break;
3549 }
3550 break;
3551 }
3552#endif
3553 }
3554 if (!--cLeft)
3555 break;
3556 }
3557 }
3558
3559 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3560 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3561 return VINF_SUCCESS;
3562}
3563
3564
3565/**
3566 * Clears the user entry in a user table.
3567 *
3568 * This is used to remove all references to a page when flushing it.
3569 */
3570static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3571{
3572 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3573 Assert(pUser->iUser < pPool->cCurPages);
3574 uint32_t iUserTable = pUser->iUserTable;
3575
3576 /*
3577 * Map the user page.
3578 */
3579 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3580 union
3581 {
3582 uint64_t *pau64;
3583 uint32_t *pau32;
3584 } u;
3585 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3586
3587 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3588
3589 /* Safety precaution in case we change the paging for other modes too in the future. */
3590 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3591
3592#ifdef VBOX_STRICT
3593 /*
3594 * Some sanity checks.
3595 */
3596 switch (pUserPage->enmKind)
3597 {
3598 case PGMPOOLKIND_32BIT_PD:
3599 case PGMPOOLKIND_32BIT_PD_PHYS:
3600 Assert(iUserTable < X86_PG_ENTRIES);
3601 break;
3602 case PGMPOOLKIND_PAE_PDPT:
3603 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3604 case PGMPOOLKIND_PAE_PDPT_PHYS:
3605 Assert(iUserTable < 4);
3606 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3607 break;
3608 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3609 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3610 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3611 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3612 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3613 case PGMPOOLKIND_PAE_PD_PHYS:
3614 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3615 break;
3616 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3617 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3618 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3619 break;
3620 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3621 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3622 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3623 break;
3624 case PGMPOOLKIND_64BIT_PML4:
3625 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3626 /* GCPhys >> PAGE_SHIFT is the index here */
3627 break;
3628 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3629 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3630 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3631 break;
3632
3633 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3634 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3635 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3636 break;
3637
3638 case PGMPOOLKIND_ROOT_NESTED:
3639 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3640 break;
3641
3642 default:
3643 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3644 break;
3645 }
3646#endif /* VBOX_STRICT */
3647
3648 /*
3649 * Clear the entry in the user page.
3650 */
3651 switch (pUserPage->enmKind)
3652 {
3653 /* 32-bit entries */
3654 case PGMPOOLKIND_32BIT_PD:
3655 case PGMPOOLKIND_32BIT_PD_PHYS:
3656 ASMAtomicWriteSize(&u.pau32[iUserTable], 0);
3657 break;
3658
3659 /* 64-bit entries */
3660 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3661 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3662 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3663 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3664 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3665#if defined(IN_RC)
3666 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3667 * non-present PDPT will continue to cause page faults.
3668 */
3669 ASMReloadCR3();
3670#endif
3671 /* no break */
3672 case PGMPOOLKIND_PAE_PD_PHYS:
3673 case PGMPOOLKIND_PAE_PDPT_PHYS:
3674 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3675 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3676 case PGMPOOLKIND_64BIT_PML4:
3677 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3678 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3679 case PGMPOOLKIND_PAE_PDPT:
3680 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3681 case PGMPOOLKIND_ROOT_NESTED:
3682 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3683 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3684 ASMAtomicWriteSize(&u.pau64[iUserTable], 0);
3685 break;
3686
3687 default:
3688 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3689 }
3690}
3691
3692
3693/**
3694 * Clears all users of a page.
3695 */
3696static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3697{
3698 /*
3699 * Free all the user records.
3700 */
3701 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3702
3703 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3704 uint16_t i = pPage->iUserHead;
3705 while (i != NIL_PGMPOOL_USER_INDEX)
3706 {
3707 /* Clear enter in user table. */
3708 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3709
3710 /* Free it. */
3711 const uint16_t iNext = paUsers[i].iNext;
3712 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3713 paUsers[i].iNext = pPool->iUserFreeHead;
3714 pPool->iUserFreeHead = i;
3715
3716 /* Next. */
3717 i = iNext;
3718 }
3719 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3720}
3721
3722
3723/**
3724 * Allocates a new physical cross reference extent.
3725 *
3726 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3727 * @param pVM The VM handle.
3728 * @param piPhysExt Where to store the phys ext index.
3729 */
3730PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3731{
3732 Assert(PGMIsLockOwner(pVM));
3733 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3734 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3735 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3736 {
3737 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3738 return NULL;
3739 }
3740 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3741 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3742 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3743 *piPhysExt = iPhysExt;
3744 return pPhysExt;
3745}
3746
3747
3748/**
3749 * Frees a physical cross reference extent.
3750 *
3751 * @param pVM The VM handle.
3752 * @param iPhysExt The extent to free.
3753 */
3754void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3755{
3756 Assert(PGMIsLockOwner(pVM));
3757 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3758 Assert(iPhysExt < pPool->cMaxPhysExts);
3759 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3760 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3761 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3762 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3763 pPool->iPhysExtFreeHead = iPhysExt;
3764}
3765
3766
3767/**
3768 * Frees a physical cross reference extent.
3769 *
3770 * @param pVM The VM handle.
3771 * @param iPhysExt The extent to free.
3772 */
3773void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3774{
3775 Assert(PGMIsLockOwner(pVM));
3776 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3777
3778 const uint16_t iPhysExtStart = iPhysExt;
3779 PPGMPOOLPHYSEXT pPhysExt;
3780 do
3781 {
3782 Assert(iPhysExt < pPool->cMaxPhysExts);
3783 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3784 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3785 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3786
3787 /* next */
3788 iPhysExt = pPhysExt->iNext;
3789 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3790
3791 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3792 pPool->iPhysExtFreeHead = iPhysExtStart;
3793}
3794
3795
3796/**
3797 * Insert a reference into a list of physical cross reference extents.
3798 *
3799 * @returns The new tracking data for PGMPAGE.
3800 *
3801 * @param pVM The VM handle.
3802 * @param iPhysExt The physical extent index of the list head.
3803 * @param iShwPT The shadow page table index.
3804 *
3805 */
3806static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3807{
3808 Assert(PGMIsLockOwner(pVM));
3809 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3810 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3811
3812 /* special common case. */
3813 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3814 {
3815 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3816 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3817 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d}\n", iPhysExt, iShwPT));
3818 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3819 }
3820
3821 /* general treatment. */
3822 const uint16_t iPhysExtStart = iPhysExt;
3823 unsigned cMax = 15;
3824 for (;;)
3825 {
3826 Assert(iPhysExt < pPool->cMaxPhysExts);
3827 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3828 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3829 {
3830 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3831 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3832 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3833 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3834 }
3835 if (!--cMax)
3836 {
3837 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3838 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3839 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3840 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3841 }
3842 }
3843
3844 /* add another extent to the list. */
3845 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3846 if (!pNew)
3847 {
3848 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackNoExtentsLeft);
3849 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3850 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3851 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3852 }
3853 pNew->iNext = iPhysExtStart;
3854 pNew->aidx[0] = iShwPT;
3855 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3856 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3857}
3858
3859
3860/**
3861 * Add a reference to guest physical page where extents are in use.
3862 *
3863 * @returns The new tracking data for PGMPAGE.
3864 *
3865 * @param pVM The VM handle.
3866 * @param u16 The ram range flags (top 16-bits).
3867 * @param iShwPT The shadow page table index.
3868 */
3869uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3870{
3871 pgmLock(pVM);
3872 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3873 {
3874 /*
3875 * Convert to extent list.
3876 */
3877 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3878 uint16_t iPhysExt;
3879 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3880 if (pPhysExt)
3881 {
3882 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3883 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3884 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3885 pPhysExt->aidx[1] = iShwPT;
3886 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3887 }
3888 else
3889 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3890 }
3891 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3892 {
3893 /*
3894 * Insert into the extent list.
3895 */
3896 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3897 }
3898 else
3899 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3900 pgmUnlock(pVM);
3901 return u16;
3902}
3903
3904
3905/**
3906 * Clear references to guest physical memory.
3907 *
3908 * @param pPool The pool.
3909 * @param pPage The page.
3910 * @param pPhysPage Pointer to the aPages entry in the ram range.
3911 */
3912void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3913{
3914 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3915 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3916
3917 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3918 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3919 {
3920 PVM pVM = pPool->CTX_SUFF(pVM);
3921 pgmLock(pVM);
3922
3923 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3924 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3925 do
3926 {
3927 Assert(iPhysExt < pPool->cMaxPhysExts);
3928
3929 /*
3930 * Look for the shadow page and check if it's all freed.
3931 */
3932 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3933 {
3934 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3935 {
3936 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3937
3938 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3939 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3940 {
3941 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3942 pgmUnlock(pVM);
3943 return;
3944 }
3945
3946 /* we can free the node. */
3947 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3948 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3949 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3950 {
3951 /* lonely node */
3952 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3953 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3954 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3955 }
3956 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3957 {
3958 /* head */
3959 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3960 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3961 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3962 }
3963 else
3964 {
3965 /* in list */
3966 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3967 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3968 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3969 }
3970 iPhysExt = iPhysExtNext;
3971 pgmUnlock(pVM);
3972 return;
3973 }
3974 }
3975
3976 /* next */
3977 iPhysExtPrev = iPhysExt;
3978 iPhysExt = paPhysExts[iPhysExt].iNext;
3979 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3980
3981 pgmUnlock(pVM);
3982 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3983 }
3984 else /* nothing to do */
3985 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3986}
3987
3988
3989/**
3990 * Clear references to guest physical memory.
3991 *
3992 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3993 * is assumed to be correct, so the linear search can be skipped and we can assert
3994 * at an earlier point.
3995 *
3996 * @param pPool The pool.
3997 * @param pPage The page.
3998 * @param HCPhys The host physical address corresponding to the guest page.
3999 * @param GCPhys The guest physical address corresponding to HCPhys.
4000 */
4001static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
4002{
4003 /*
4004 * Walk range list.
4005 */
4006 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4007 while (pRam)
4008 {
4009 RTGCPHYS off = GCPhys - pRam->GCPhys;
4010 if (off < pRam->cb)
4011 {
4012 /* does it match? */
4013 const unsigned iPage = off >> PAGE_SHIFT;
4014 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4015#ifdef LOG_ENABLED
4016 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
4017 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4018#endif
4019 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4020 {
4021 Assert(pPage->cPresent);
4022 Assert(pPool->cPresent);
4023 pPage->cPresent--;
4024 pPool->cPresent--;
4025 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
4026 return;
4027 }
4028 break;
4029 }
4030 pRam = pRam->CTX_SUFF(pNext);
4031 }
4032 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4033}
4034
4035
4036/**
4037 * Clear references to guest physical memory.
4038 *
4039 * @param pPool The pool.
4040 * @param pPage The page.
4041 * @param HCPhys The host physical address corresponding to the guest page.
4042 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4043 */
4044void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
4045{
4046 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4047
4048 /*
4049 * Walk range list.
4050 */
4051 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4052 while (pRam)
4053 {
4054 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
4055 if (off < pRam->cb)
4056 {
4057 /* does it match? */
4058 const unsigned iPage = off >> PAGE_SHIFT;
4059 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4060 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4061 {
4062 Assert(pPage->cPresent);
4063 Assert(pPool->cPresent);
4064 pPage->cPresent--;
4065 pPool->cPresent--;
4066 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
4067 return;
4068 }
4069 break;
4070 }
4071 pRam = pRam->CTX_SUFF(pNext);
4072 }
4073
4074 /*
4075 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4076 */
4077 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4078 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4079 while (pRam)
4080 {
4081 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4082 while (iPage-- > 0)
4083 {
4084 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4085 {
4086 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4087 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4088 Assert(pPage->cPresent);
4089 Assert(pPool->cPresent);
4090 pPage->cPresent--;
4091 pPool->cPresent--;
4092 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
4093 return;
4094 }
4095 }
4096 pRam = pRam->CTX_SUFF(pNext);
4097 }
4098
4099 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
4100}
4101
4102
4103/**
4104 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4105 *
4106 * @param pPool The pool.
4107 * @param pPage The page.
4108 * @param pShwPT The shadow page table (mapping of the page).
4109 * @param pGstPT The guest page table.
4110 */
4111DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4112{
4113 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4114 if (pShwPT->a[i].n.u1Present)
4115 {
4116 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4117 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4118 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
4119 if (!pPage->cPresent)
4120 break;
4121 }
4122}
4123
4124
4125/**
4126 * Clear references to guest physical memory in a PAE / 32-bit page table.
4127 *
4128 * @param pPool The pool.
4129 * @param pPage The page.
4130 * @param pShwPT The shadow page table (mapping of the page).
4131 * @param pGstPT The guest page table (just a half one).
4132 */
4133DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
4134{
4135 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4136 if (pShwPT->a[i].n.u1Present)
4137 {
4138 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4139 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4140 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
4141 if (!pPage->cPresent)
4142 break;
4143 }
4144}
4145
4146
4147/**
4148 * Clear references to guest physical memory in a PAE / PAE page table.
4149 *
4150 * @param pPool The pool.
4151 * @param pPage The page.
4152 * @param pShwPT The shadow page table (mapping of the page).
4153 * @param pGstPT The guest page table.
4154 */
4155DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
4156{
4157 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4158 if (pShwPT->a[i].n.u1Present)
4159 {
4160 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4161 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4162 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
4163 if (!pPage->cPresent)
4164 break;
4165 }
4166}
4167
4168
4169/**
4170 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4171 *
4172 * @param pPool The pool.
4173 * @param pPage The page.
4174 * @param pShwPT The shadow page table (mapping of the page).
4175 */
4176DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4177{
4178 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4179 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4180 if (pShwPT->a[i].n.u1Present)
4181 {
4182 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4183 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4184 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
4185 if (!pPage->cPresent)
4186 break;
4187 }
4188}
4189
4190
4191/**
4192 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4193 *
4194 * @param pPool The pool.
4195 * @param pPage The page.
4196 * @param pShwPT The shadow page table (mapping of the page).
4197 */
4198DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
4199{
4200 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4201 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4202 if (pShwPT->a[i].n.u1Present)
4203 {
4204 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4205 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
4206 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
4207 if (!pPage->cPresent)
4208 break;
4209 }
4210}
4211
4212
4213/**
4214 * Clear references to shadowed pages in an EPT page table.
4215 *
4216 * @param pPool The pool.
4217 * @param pPage The page.
4218 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4219 */
4220DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4221{
4222 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4223 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4224 if (pShwPT->a[i].n.u1Present)
4225 {
4226 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4227 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4228 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
4229 if (!pPage->cPresent)
4230 break;
4231 }
4232}
4233
4234
4235
4236/**
4237 * Clear references to shadowed pages in a 32 bits page directory.
4238 *
4239 * @param pPool The pool.
4240 * @param pPage The page.
4241 * @param pShwPD The shadow page directory (mapping of the page).
4242 */
4243DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4244{
4245 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4246 {
4247 if ( pShwPD->a[i].n.u1Present
4248 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4249 )
4250 {
4251 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4252 if (pSubPage)
4253 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4254 else
4255 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4256 }
4257 }
4258}
4259
4260/**
4261 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4262 *
4263 * @param pPool The pool.
4264 * @param pPage The page.
4265 * @param pShwPD The shadow page directory (mapping of the page).
4266 */
4267DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4268{
4269 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4270 {
4271 if ( pShwPD->a[i].n.u1Present
4272 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4273 )
4274 {
4275#ifdef PGM_WITH_LARGE_PAGES
4276 if (pShwPD->a[i].b.u1Size)
4277 {
4278 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4279 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4280 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */);
4281 }
4282 else
4283#endif
4284 {
4285 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4286 if (pSubPage)
4287 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4288 else
4289 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4290 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4291 }
4292 }
4293 }
4294}
4295
4296/**
4297 * Clear references to shadowed pages in a PAE page directory pointer table.
4298 *
4299 * @param pPool The pool.
4300 * @param pPage The page.
4301 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4302 */
4303DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4304{
4305 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4306 {
4307 if ( pShwPDPT->a[i].n.u1Present
4308 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4309 )
4310 {
4311 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4312 if (pSubPage)
4313 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4314 else
4315 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4316 }
4317 }
4318}
4319
4320
4321/**
4322 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4323 *
4324 * @param pPool The pool.
4325 * @param pPage The page.
4326 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4327 */
4328DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4329{
4330 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4331 {
4332 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
4333 if (pShwPDPT->a[i].n.u1Present)
4334 {
4335 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4336 if (pSubPage)
4337 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4338 else
4339 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4340 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4341 }
4342 }
4343}
4344
4345
4346/**
4347 * Clear references to shadowed pages in a 64-bit level 4 page table.
4348 *
4349 * @param pPool The pool.
4350 * @param pPage The page.
4351 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4352 */
4353DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4354{
4355 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4356 {
4357 if (pShwPML4->a[i].n.u1Present)
4358 {
4359 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4360 if (pSubPage)
4361 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4362 else
4363 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4364 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4365 }
4366 }
4367}
4368
4369
4370/**
4371 * Clear references to shadowed pages in an EPT page directory.
4372 *
4373 * @param pPool The pool.
4374 * @param pPage The page.
4375 * @param pShwPD The shadow page directory (mapping of the page).
4376 */
4377DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4378{
4379 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4380 {
4381 if (pShwPD->a[i].n.u1Present)
4382 {
4383#ifdef PGM_WITH_LARGE_PAGES
4384 if (pShwPD->a[i].b.u1Size)
4385 {
4386 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4387 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4388 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */);
4389 }
4390 else
4391#endif
4392 {
4393 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4394 if (pSubPage)
4395 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4396 else
4397 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4398 }
4399 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4400 }
4401 }
4402}
4403
4404
4405/**
4406 * Clear references to shadowed pages in an EPT page directory pointer table.
4407 *
4408 * @param pPool The pool.
4409 * @param pPage The page.
4410 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4411 */
4412DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4413{
4414 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4415 {
4416 if (pShwPDPT->a[i].n.u1Present)
4417 {
4418 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4419 if (pSubPage)
4420 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4421 else
4422 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4423 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4424 }
4425 }
4426}
4427
4428
4429/**
4430 * Clears all references made by this page.
4431 *
4432 * This includes other shadow pages and GC physical addresses.
4433 *
4434 * @param pPool The pool.
4435 * @param pPage The page.
4436 */
4437static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4438{
4439 /*
4440 * Map the shadow page and take action according to the page kind.
4441 */
4442 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
4443 switch (pPage->enmKind)
4444 {
4445 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4446 {
4447 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4448 void *pvGst;
4449 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4450 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4451 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4452 break;
4453 }
4454
4455 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4456 {
4457 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4458 void *pvGst;
4459 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4460 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
4461 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4462 break;
4463 }
4464
4465 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4466 {
4467 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4468 void *pvGst;
4469 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4470 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
4471 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4472 break;
4473 }
4474
4475 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4476 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4477 {
4478 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4479 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4480 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4481 break;
4482 }
4483
4484 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4485 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4486 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4487 {
4488 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4489 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4490 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4491 break;
4492 }
4493
4494 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4495 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4496 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4497 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4498 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4499 case PGMPOOLKIND_PAE_PD_PHYS:
4500 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4501 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4502 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4503 break;
4504
4505 case PGMPOOLKIND_32BIT_PD_PHYS:
4506 case PGMPOOLKIND_32BIT_PD:
4507 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4508 break;
4509
4510 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4511 case PGMPOOLKIND_PAE_PDPT:
4512 case PGMPOOLKIND_PAE_PDPT_PHYS:
4513 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4514 break;
4515
4516 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4517 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4518 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4519 break;
4520
4521 case PGMPOOLKIND_64BIT_PML4:
4522 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4523 break;
4524
4525 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4526 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4527 break;
4528
4529 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4530 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4531 break;
4532
4533 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4534 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4535 break;
4536
4537 default:
4538 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4539 }
4540
4541 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4542 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4543 ASMMemZeroPage(pvShw);
4544 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4545 pPage->fZeroed = true;
4546 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4547 Assert(!pPage->cPresent);
4548}
4549
4550/**
4551 * Flushes a pool page.
4552 *
4553 * This moves the page to the free list after removing all user references to it.
4554 *
4555 * @returns VBox status code.
4556 * @retval VINF_SUCCESS on success.
4557 * @param pPool The pool.
4558 * @param HCPhys The HC physical address of the shadow page.
4559 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4560 */
4561int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4562{
4563 PVM pVM = pPool->CTX_SUFF(pVM);
4564 bool fFlushRequired = false;
4565
4566 int rc = VINF_SUCCESS;
4567 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4568 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4569 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4570
4571 /*
4572 * Quietly reject any attempts at flushing any of the special root pages.
4573 */
4574 if (pPage->idx < PGMPOOL_IDX_FIRST)
4575 {
4576 AssertFailed(); /* can no longer happen */
4577 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4578 return VINF_SUCCESS;
4579 }
4580
4581 pgmLock(pVM);
4582
4583 /*
4584 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4585 */
4586 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4587 {
4588 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4589 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4590 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4591 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4592 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4593 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4594 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4595 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4596 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4597 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4598 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4599 pgmUnlock(pVM);
4600 return VINF_SUCCESS;
4601 }
4602
4603#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4604 /* Start a subset so we won't run out of mapping space. */
4605 PVMCPU pVCpu = VMMGetCpu(pVM);
4606 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4607#endif
4608
4609 /*
4610 * Mark the page as being in need of an ASMMemZeroPage().
4611 */
4612 pPage->fZeroed = false;
4613
4614#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4615 if (pPage->fDirty)
4616 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4617#endif
4618
4619 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4620 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4621 fFlushRequired = true;
4622
4623 /*
4624 * Clear the page.
4625 */
4626 pgmPoolTrackClearPageUsers(pPool, pPage);
4627 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4628 pgmPoolTrackDeref(pPool, pPage);
4629 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4630
4631 /*
4632 * Flush it from the cache.
4633 */
4634 pgmPoolCacheFlushPage(pPool, pPage);
4635
4636#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4637 /* Heavy stuff done. */
4638 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4639#endif
4640
4641 /*
4642 * Deregistering the monitoring.
4643 */
4644 if (pPage->fMonitored)
4645 rc = pgmPoolMonitorFlush(pPool, pPage);
4646
4647 /*
4648 * Free the page.
4649 */
4650 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4651 pPage->iNext = pPool->iFreeHead;
4652 pPool->iFreeHead = pPage->idx;
4653 pPage->enmKind = PGMPOOLKIND_FREE;
4654 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4655 pPage->GCPhys = NIL_RTGCPHYS;
4656 pPage->fReusedFlushPending = false;
4657
4658 pPool->cUsedPages--;
4659
4660 /* Flush the TLBs of all VCPUs if required. */
4661 if ( fFlushRequired
4662 && fFlush)
4663 {
4664 PGM_INVL_ALL_VCPU_TLBS(pVM);
4665 }
4666
4667 pgmUnlock(pVM);
4668 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4669 return rc;
4670}
4671
4672
4673/**
4674 * Frees a usage of a pool page.
4675 *
4676 * The caller is responsible to updating the user table so that it no longer
4677 * references the shadow page.
4678 *
4679 * @param pPool The pool.
4680 * @param HCPhys The HC physical address of the shadow page.
4681 * @param iUser The shadow page pool index of the user table.
4682 * @param iUserTable The index into the user table (shadowed).
4683 */
4684void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4685{
4686 PVM pVM = pPool->CTX_SUFF(pVM);
4687
4688 STAM_PROFILE_START(&pPool->StatFree, a);
4689 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4690 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4691 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4692 pgmLock(pVM);
4693 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4694 if (!pPage->fCached)
4695 pgmPoolFlushPage(pPool, pPage);
4696 pgmUnlock(pVM);
4697 STAM_PROFILE_STOP(&pPool->StatFree, a);
4698}
4699
4700
4701/**
4702 * Makes one or more free page free.
4703 *
4704 * @returns VBox status code.
4705 * @retval VINF_SUCCESS on success.
4706 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4707 *
4708 * @param pPool The pool.
4709 * @param enmKind Page table kind
4710 * @param iUser The user of the page.
4711 */
4712static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4713{
4714 PVM pVM = pPool->CTX_SUFF(pVM);
4715
4716 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4717
4718 /*
4719 * If the pool isn't full grown yet, expand it.
4720 */
4721 if ( pPool->cCurPages < pPool->cMaxPages
4722#if defined(IN_RC)
4723 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4724 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4725 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4726#endif
4727 )
4728 {
4729 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4730#ifdef IN_RING3
4731 int rc = PGMR3PoolGrow(pVM);
4732#else
4733 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4734#endif
4735 if (RT_FAILURE(rc))
4736 return rc;
4737 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4738 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4739 return VINF_SUCCESS;
4740 }
4741
4742 /*
4743 * Free one cached page.
4744 */
4745 return pgmPoolCacheFreeOne(pPool, iUser);
4746}
4747
4748/**
4749 * Allocates a page from the pool.
4750 *
4751 * This page may actually be a cached page and not in need of any processing
4752 * on the callers part.
4753 *
4754 * @returns VBox status code.
4755 * @retval VINF_SUCCESS if a NEW page was allocated.
4756 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4757 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4758 * @param pVM The VM handle.
4759 * @param GCPhys The GC physical address of the page we're gonna shadow.
4760 * For 4MB and 2MB PD entries, it's the first address the
4761 * shadow PT is covering.
4762 * @param enmKind The kind of mapping.
4763 * @param enmAccess Access type for the mapping (only relevant for big pages)
4764 * @param iUser The shadow page pool index of the user table.
4765 * @param iUserTable The index into the user table (shadowed).
4766 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4767 * @param fLockPage Lock the page
4768 */
4769int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4770{
4771 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4772 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4773 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4774 *ppPage = NULL;
4775 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4776 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4777 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4778
4779 pgmLock(pVM);
4780
4781 if (pPool->fCacheEnabled)
4782 {
4783 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4784 if (RT_SUCCESS(rc2))
4785 {
4786 if (fLockPage)
4787 pgmPoolLockPage(pPool, *ppPage);
4788 pgmUnlock(pVM);
4789 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4790 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4791 return rc2;
4792 }
4793 }
4794
4795 /*
4796 * Allocate a new one.
4797 */
4798 int rc = VINF_SUCCESS;
4799 uint16_t iNew = pPool->iFreeHead;
4800 if (iNew == NIL_PGMPOOL_IDX)
4801 {
4802 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4803 if (RT_FAILURE(rc))
4804 {
4805 pgmUnlock(pVM);
4806 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4807 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4808 return rc;
4809 }
4810 iNew = pPool->iFreeHead;
4811 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4812 }
4813
4814 /* unlink the free head */
4815 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4816 pPool->iFreeHead = pPage->iNext;
4817 pPage->iNext = NIL_PGMPOOL_IDX;
4818
4819 /*
4820 * Initialize it.
4821 */
4822 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4823 pPage->enmKind = enmKind;
4824 pPage->enmAccess = enmAccess;
4825 pPage->GCPhys = GCPhys;
4826 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4827 pPage->fMonitored = false;
4828 pPage->fCached = false;
4829#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4830 pPage->fDirty = false;
4831#endif
4832 pPage->fReusedFlushPending = false;
4833 pPage->cModifications = 0;
4834 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4835 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4836 pPage->cPresent = 0;
4837 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
4838 pPage->pvLastAccessHandlerFault = 0;
4839 pPage->cLastAccessHandlerCount = 0;
4840 pPage->pvLastAccessHandlerRip = 0;
4841
4842 /*
4843 * Insert into the tracking and cache. If this fails, free the page.
4844 */
4845 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4846 if (RT_FAILURE(rc3))
4847 {
4848 pPool->cUsedPages--;
4849 pPage->enmKind = PGMPOOLKIND_FREE;
4850 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4851 pPage->GCPhys = NIL_RTGCPHYS;
4852 pPage->iNext = pPool->iFreeHead;
4853 pPool->iFreeHead = pPage->idx;
4854 pgmUnlock(pVM);
4855 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4856 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4857 return rc3;
4858 }
4859
4860 /*
4861 * Commit the allocation, clear the page and return.
4862 */
4863#ifdef VBOX_WITH_STATISTICS
4864 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4865 pPool->cUsedPagesHigh = pPool->cUsedPages;
4866#endif
4867
4868 if (!pPage->fZeroed)
4869 {
4870 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4871 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4872 ASMMemZeroPage(pv);
4873 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4874 }
4875
4876 *ppPage = pPage;
4877 if (fLockPage)
4878 pgmPoolLockPage(pPool, pPage);
4879 pgmUnlock(pVM);
4880 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4881 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4882 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4883 return rc;
4884}
4885
4886
4887/**
4888 * Frees a usage of a pool page.
4889 *
4890 * @param pVM The VM handle.
4891 * @param HCPhys The HC physical address of the shadow page.
4892 * @param iUser The shadow page pool index of the user table.
4893 * @param iUserTable The index into the user table (shadowed).
4894 */
4895void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4896{
4897 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4898 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4899 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4900}
4901
4902/**
4903 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4904 *
4905 * @returns Pointer to the shadow page structure.
4906 * @param pPool The pool.
4907 * @param HCPhys The HC physical address of the shadow page.
4908 */
4909PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4910{
4911 PVM pVM = pPool->CTX_SUFF(pVM);
4912
4913 Assert(PGMIsLockOwner(pVM));
4914
4915 /*
4916 * Look up the page.
4917 */
4918 pgmLock(pVM);
4919 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4920 pgmUnlock(pVM);
4921
4922 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4923 return pPage;
4924}
4925
4926#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
4927/**
4928 * Flush the specified page if present
4929 *
4930 * @param pVM The VM handle.
4931 * @param GCPhys Guest physical address of the page to flush
4932 */
4933void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
4934{
4935 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4936
4937 VM_ASSERT_EMT(pVM);
4938
4939 /*
4940 * Look up the GCPhys in the hash.
4941 */
4942 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
4943 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
4944 if (i == NIL_PGMPOOL_IDX)
4945 return;
4946
4947 do
4948 {
4949 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4950 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
4951 {
4952 switch (pPage->enmKind)
4953 {
4954 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4955 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4956 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4957 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4958 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4959 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4960 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4961 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4962 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4963 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4964 case PGMPOOLKIND_64BIT_PML4:
4965 case PGMPOOLKIND_32BIT_PD:
4966 case PGMPOOLKIND_PAE_PDPT:
4967 {
4968 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
4969#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4970 if (pPage->fDirty)
4971 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
4972 else
4973#endif
4974 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
4975 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
4976 pgmPoolMonitorChainFlush(pPool, pPage);
4977 return;
4978 }
4979
4980 /* ignore, no monitoring. */
4981 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4982 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4983 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4984 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4985 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4986 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4987 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4988 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4989 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4990 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4991 case PGMPOOLKIND_ROOT_NESTED:
4992 case PGMPOOLKIND_PAE_PD_PHYS:
4993 case PGMPOOLKIND_PAE_PDPT_PHYS:
4994 case PGMPOOLKIND_32BIT_PD_PHYS:
4995 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4996 break;
4997
4998 default:
4999 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5000 }
5001 }
5002
5003 /* next */
5004 i = pPage->iNext;
5005 } while (i != NIL_PGMPOOL_IDX);
5006 return;
5007}
5008#endif /* IN_RING3 */
5009
5010#ifdef IN_RING3
5011
5012
5013/**
5014 * Reset CPU on hot plugging.
5015 *
5016 * @param pVM The VM handle.
5017 * @param pVCpu The virtual CPU.
5018 */
5019void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5020{
5021 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5022
5023 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5024 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5025 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5026}
5027
5028
5029/**
5030 * Flushes the entire cache.
5031 *
5032 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5033 * this and execute this CR3 flush.
5034 *
5035 * @param pPool The pool.
5036 */
5037void pgmR3PoolReset(PVM pVM)
5038{
5039 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5040
5041 Assert(PGMIsLockOwner(pVM));
5042 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5043 LogFlow(("pgmR3PoolReset:\n"));
5044
5045 /*
5046 * If there are no pages in the pool, there is nothing to do.
5047 */
5048 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5049 {
5050 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5051 return;
5052 }
5053
5054 /*
5055 * Exit the shadow mode since we're going to clear everything,
5056 * including the root page.
5057 */
5058 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5059 {
5060 PVMCPU pVCpu = &pVM->aCpus[i];
5061 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5062 }
5063
5064 /*
5065 * Nuke the free list and reinsert all pages into it.
5066 */
5067 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5068 {
5069 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5070
5071 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5072 if (pPage->fMonitored)
5073 pgmPoolMonitorFlush(pPool, pPage);
5074 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5075 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5076 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5077 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5078 pPage->cModifications = 0;
5079 pPage->GCPhys = NIL_RTGCPHYS;
5080 pPage->enmKind = PGMPOOLKIND_FREE;
5081 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5082 Assert(pPage->idx == i);
5083 pPage->iNext = i + 1;
5084 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5085 pPage->fSeenNonGlobal = false;
5086 pPage->fMonitored = false;
5087#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5088 pPage->fDirty = false;
5089#endif
5090 pPage->fCached = false;
5091 pPage->fReusedFlushPending = false;
5092 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5093 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5094 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5095 pPage->cLocked = 0;
5096 }
5097 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5098 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5099 pPool->cUsedPages = 0;
5100
5101 /*
5102 * Zap and reinitialize the user records.
5103 */
5104 pPool->cPresent = 0;
5105 pPool->iUserFreeHead = 0;
5106 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5107 const unsigned cMaxUsers = pPool->cMaxUsers;
5108 for (unsigned i = 0; i < cMaxUsers; i++)
5109 {
5110 paUsers[i].iNext = i + 1;
5111 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5112 paUsers[i].iUserTable = 0xfffffffe;
5113 }
5114 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5115
5116 /*
5117 * Clear all the GCPhys links and rebuild the phys ext free list.
5118 */
5119 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
5120 pRam;
5121 pRam = pRam->CTX_SUFF(pNext))
5122 {
5123 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5124 while (iPage-- > 0)
5125 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
5126 }
5127
5128 pPool->iPhysExtFreeHead = 0;
5129 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5130 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5131 for (unsigned i = 0; i < cMaxPhysExts; i++)
5132 {
5133 paPhysExts[i].iNext = i + 1;
5134 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5135 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5136 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5137 }
5138 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5139
5140 /*
5141 * Just zap the modified list.
5142 */
5143 pPool->cModifiedPages = 0;
5144 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5145
5146 /*
5147 * Clear the GCPhys hash and the age list.
5148 */
5149 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5150 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5151 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5152 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5153
5154#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5155 /* Clear all dirty pages. */
5156 pPool->idxFreeDirtyPage = 0;
5157 pPool->cDirtyPages = 0;
5158 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
5159 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
5160#endif
5161
5162 /*
5163 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5164 */
5165 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
5166 {
5167 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5168 pPage->iNext = NIL_PGMPOOL_IDX;
5169 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5170 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5171 pPage->cModifications = 0;
5172 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
5173 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5174 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5175 if (pPage->fMonitored)
5176 {
5177 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
5178 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
5179 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
5180 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5181 pPool->pszAccessHandler);
5182 AssertFatalRCSuccess(rc);
5183 pgmPoolHashInsert(pPool, pPage);
5184 }
5185 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5186 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5187 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5188 }
5189
5190 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5191 {
5192 /*
5193 * Re-enter the shadowing mode and assert Sync CR3 FF.
5194 */
5195 PVMCPU pVCpu = &pVM->aCpus[i];
5196 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5197 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5198 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5199 }
5200
5201 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5202}
5203#endif /* IN_RING3 */
5204
5205#ifdef LOG_ENABLED
5206static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5207{
5208 switch(enmKind)
5209 {
5210 case PGMPOOLKIND_INVALID:
5211 return "PGMPOOLKIND_INVALID";
5212 case PGMPOOLKIND_FREE:
5213 return "PGMPOOLKIND_FREE";
5214 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5215 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5216 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5217 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5218 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5219 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5220 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5221 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5222 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5223 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5224 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5225 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5226 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5227 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5228 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5229 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5230 case PGMPOOLKIND_32BIT_PD:
5231 return "PGMPOOLKIND_32BIT_PD";
5232 case PGMPOOLKIND_32BIT_PD_PHYS:
5233 return "PGMPOOLKIND_32BIT_PD_PHYS";
5234 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5235 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5236 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5237 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5238 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5239 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5240 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5241 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5242 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5243 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5244 case PGMPOOLKIND_PAE_PD_PHYS:
5245 return "PGMPOOLKIND_PAE_PD_PHYS";
5246 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5247 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5248 case PGMPOOLKIND_PAE_PDPT:
5249 return "PGMPOOLKIND_PAE_PDPT";
5250 case PGMPOOLKIND_PAE_PDPT_PHYS:
5251 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5252 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5253 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5254 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5255 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5256 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5257 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5258 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5259 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5260 case PGMPOOLKIND_64BIT_PML4:
5261 return "PGMPOOLKIND_64BIT_PML4";
5262 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5263 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5264 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5265 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5266 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5267 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5268 case PGMPOOLKIND_ROOT_NESTED:
5269 return "PGMPOOLKIND_ROOT_NESTED";
5270 }
5271 return "Unknown kind!";
5272}
5273#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette