VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 29287

Last change on this file since 29287 was 29250, checked in by vboxsync, 15 years ago

iprt/asm*.h: split out asm-math.h, don't include asm-*.h from asm.h, don't include asm.h from sup.h. Fixed a couple file headers.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 197.4 KB
Line 
1/* $Id: PGMAllPool.cpp 29250 2010-05-09 17:53:58Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/pgm.h>
24#include <VBox/mm.h>
25#include <VBox/em.h>
26#include <VBox/cpum.h>
27#ifdef IN_RC
28# include <VBox/patm.h>
29#endif
30#include "../PGMInternal.h"
31#include <VBox/vm.h>
32#include "../PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/hwacc_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46RT_C_DECLS_BEGIN
47static void pgmPoolFlushAllInt(PPGMPOOL pPool);
48DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
49DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
50static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
51static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
52static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#ifndef IN_RING3
54DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
55#endif
56#ifdef LOG_ENABLED
57static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
58#endif
59#if defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)
60static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT);
61#endif
62
63int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
64PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
65void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
66void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
67
68RT_C_DECLS_END
69
70
71/**
72 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
73 *
74 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
75 * @param enmKind The page kind.
76 */
77DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
78{
79 switch (enmKind)
80 {
81 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
82 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
83 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
84 return true;
85 default:
86 return false;
87 }
88}
89
90/** @def PGMPOOL_PAGE_2_LOCKED_PTR
91 * Maps a pool page pool into the current context and lock it (RC only).
92 *
93 * @returns VBox status code.
94 * @param pVM The VM handle.
95 * @param pPage The pool page.
96 *
97 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
98 * small page window employeed by that function. Be careful.
99 * @remark There is no need to assert on the result.
100 */
101#if defined(IN_RC)
102DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
103{
104 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
105
106 /* Make sure the dynamic mapping will not be reused. */
107 if (pv)
108 PGMDynLockHCPage(pVM, (uint8_t *)pv);
109
110 return pv;
111}
112#else
113# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
114#endif
115
116/** @def PGMPOOL_UNLOCK_PTR
117 * Unlock a previously locked dynamic caching (RC only).
118 *
119 * @returns VBox status code.
120 * @param pVM The VM handle.
121 * @param pPage The pool page.
122 *
123 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
124 * small page window employeed by that function. Be careful.
125 * @remark There is no need to assert on the result.
126 */
127#if defined(IN_RC)
128DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
129{
130 if (pvPage)
131 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
132}
133#else
134# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
135#endif
136
137
138/**
139 * Flushes a chain of pages sharing the same access monitor.
140 *
141 * @returns VBox status code suitable for scheduling.
142 * @param pPool The pool.
143 * @param pPage A page in the chain.
144 */
145int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
146{
147 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
148
149 /*
150 * Find the list head.
151 */
152 uint16_t idx = pPage->idx;
153 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
154 {
155 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
156 {
157 idx = pPage->iMonitoredPrev;
158 Assert(idx != pPage->idx);
159 pPage = &pPool->aPages[idx];
160 }
161 }
162
163 /*
164 * Iterate the list flushing each shadow page.
165 */
166 int rc = VINF_SUCCESS;
167 for (;;)
168 {
169 idx = pPage->iMonitoredNext;
170 Assert(idx != pPage->idx);
171 if (pPage->idx >= PGMPOOL_IDX_FIRST)
172 {
173 int rc2 = pgmPoolFlushPage(pPool, pPage);
174 AssertRC(rc2);
175 }
176 /* next */
177 if (idx == NIL_PGMPOOL_IDX)
178 break;
179 pPage = &pPool->aPages[idx];
180 }
181 return rc;
182}
183
184
185/**
186 * Wrapper for getting the current context pointer to the entry being modified.
187 *
188 * @returns VBox status code suitable for scheduling.
189 * @param pVM VM Handle.
190 * @param pvDst Destination address
191 * @param pvSrc Source guest virtual address.
192 * @param GCPhysSrc The source guest physical address.
193 * @param cb Size of data to read
194 */
195DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
196{
197#if defined(IN_RING3)
198 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
199 return VINF_SUCCESS;
200#else
201 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
202 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
203#endif
204}
205
206/**
207 * Process shadow entries before they are changed by the guest.
208 *
209 * For PT entries we will clear them. For PD entries, we'll simply check
210 * for mapping conflicts and set the SyncCR3 FF if found.
211 *
212 * @param pVCpu VMCPU handle
213 * @param pPool The pool.
214 * @param pPage The head page.
215 * @param GCPhysFault The guest physical fault address.
216 * @param uAddress In R0 and GC this is the guest context fault address (flat).
217 * In R3 this is the host context 'fault' address.
218 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
219 */
220void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
221{
222 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
223 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
224 PVM pVM = pPool->CTX_SUFF(pVM);
225
226 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
227
228 for (;;)
229 {
230 union
231 {
232 void *pv;
233 PX86PT pPT;
234 PX86PTPAE pPTPae;
235 PX86PD pPD;
236 PX86PDPAE pPDPae;
237 PX86PDPT pPDPT;
238 PX86PML4 pPML4;
239 } uShw;
240
241 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
242
243 uShw.pv = NULL;
244 switch (pPage->enmKind)
245 {
246 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
247 {
248 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
249 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
250 const unsigned iShw = off / sizeof(X86PTE);
251 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
252 if (uShw.pPT->a[iShw].n.u1Present)
253 {
254 X86PTE GstPte;
255
256 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
257 AssertRC(rc);
258 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
259 pgmPoolTracDerefGCPhysHint(pPool, pPage,
260 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
261 GstPte.u & X86_PTE_PG_MASK,
262 iShw);
263 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
264 }
265 break;
266 }
267
268 /* page/2 sized */
269 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
270 {
271 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
272 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
273 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
274 {
275 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
276 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
277 if (uShw.pPTPae->a[iShw].n.u1Present)
278 {
279 X86PTE GstPte;
280 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
281 AssertRC(rc);
282
283 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
284 pgmPoolTracDerefGCPhysHint(pPool, pPage,
285 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
286 GstPte.u & X86_PTE_PG_MASK,
287 iShw);
288 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
289 }
290 }
291 break;
292 }
293
294 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
295 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
296 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
297 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
298 {
299 unsigned iGst = off / sizeof(X86PDE);
300 unsigned iShwPdpt = iGst / 256;
301 unsigned iShw = (iGst % 256) * 2;
302 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
303
304 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
305 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
306 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
307 {
308 for (unsigned i = 0; i < 2; i++)
309 {
310# ifndef IN_RING0
311 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
312 {
313 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
314 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
315 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
316 break;
317 }
318 else
319# endif /* !IN_RING0 */
320 if (uShw.pPDPae->a[iShw+i].n.u1Present)
321 {
322 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
323 pgmPoolFree(pVM,
324 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
325 pPage->idx,
326 iShw + i);
327 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
328 }
329
330 /* paranoia / a bit assumptive. */
331 if ( (off & 3)
332 && (off & 3) + cbWrite > 4)
333 {
334 const unsigned iShw2 = iShw + 2 + i;
335 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
336 {
337# ifndef IN_RING0
338 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
339 {
340 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
341 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
342 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
343 break;
344 }
345 else
346# endif /* !IN_RING0 */
347 if (uShw.pPDPae->a[iShw2].n.u1Present)
348 {
349 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
350 pgmPoolFree(pVM,
351 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
352 pPage->idx,
353 iShw2);
354 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
355 }
356 }
357 }
358 }
359 }
360 break;
361 }
362
363 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
364 {
365 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
366 const unsigned iShw = off / sizeof(X86PTEPAE);
367 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
368 if (uShw.pPTPae->a[iShw].n.u1Present)
369 {
370 X86PTEPAE GstPte;
371 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
372 AssertRC(rc);
373
374 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
375 pgmPoolTracDerefGCPhysHint(pPool, pPage,
376 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
377 GstPte.u & X86_PTE_PAE_PG_MASK,
378 iShw);
379 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
380 }
381
382 /* paranoia / a bit assumptive. */
383 if ( (off & 7)
384 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
385 {
386 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
387 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
388
389 if (uShw.pPTPae->a[iShw2].n.u1Present)
390 {
391 X86PTEPAE GstPte;
392# ifdef IN_RING3
393 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
394# else
395 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
396# endif
397 AssertRC(rc);
398 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
399 pgmPoolTracDerefGCPhysHint(pPool, pPage,
400 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
401 GstPte.u & X86_PTE_PAE_PG_MASK,
402 iShw2);
403 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
404 }
405 }
406 break;
407 }
408
409 case PGMPOOLKIND_32BIT_PD:
410 {
411 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
412 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
413
414 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
415 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
416# ifndef IN_RING0
417 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
418 {
419 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
420 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
421 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
422 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
423 break;
424 }
425# endif /* !IN_RING0 */
426# ifndef IN_RING0
427 else
428# endif /* !IN_RING0 */
429 {
430 if (uShw.pPD->a[iShw].n.u1Present)
431 {
432 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
433 pgmPoolFree(pVM,
434 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
435 pPage->idx,
436 iShw);
437 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
438 }
439 }
440 /* paranoia / a bit assumptive. */
441 if ( (off & 3)
442 && (off & 3) + cbWrite > sizeof(X86PTE))
443 {
444 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
445 if ( iShw2 != iShw
446 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
447 {
448# ifndef IN_RING0
449 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
450 {
451 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
452 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
453 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
454 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
455 break;
456 }
457# endif /* !IN_RING0 */
458# ifndef IN_RING0
459 else
460# endif /* !IN_RING0 */
461 {
462 if (uShw.pPD->a[iShw2].n.u1Present)
463 {
464 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
465 pgmPoolFree(pVM,
466 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
467 pPage->idx,
468 iShw2);
469 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
470 }
471 }
472 }
473 }
474#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
475 if ( uShw.pPD->a[iShw].n.u1Present
476 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
477 {
478 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
479# ifdef IN_RC /* TLB load - we're pushing things a bit... */
480 ASMProbeReadByte(pvAddress);
481# endif
482 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
483 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
484 }
485#endif
486 break;
487 }
488
489 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
490 {
491 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
492 const unsigned iShw = off / sizeof(X86PDEPAE);
493 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
494#ifndef IN_RING0
495 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
496 {
497 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
498 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
499 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
500 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
501 break;
502 }
503#endif /* !IN_RING0 */
504 /*
505 * Causes trouble when the guest uses a PDE to refer to the whole page table level
506 * structure. (Invalidate here; faults later on when it tries to change the page
507 * table entries -> recheck; probably only applies to the RC case.)
508 */
509# ifndef IN_RING0
510 else
511# endif /* !IN_RING0 */
512 {
513 if (uShw.pPDPae->a[iShw].n.u1Present)
514 {
515 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
516 pgmPoolFree(pVM,
517 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
518 pPage->idx,
519 iShw);
520 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
521 }
522 }
523 /* paranoia / a bit assumptive. */
524 if ( (off & 7)
525 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
526 {
527 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
528 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
529
530#ifndef IN_RING0
531 if ( iShw2 != iShw
532 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
533 {
534 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
535 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
536 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
537 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
538 break;
539 }
540#endif /* !IN_RING0 */
541# ifndef IN_RING0
542 else
543# endif /* !IN_RING0 */
544 if (uShw.pPDPae->a[iShw2].n.u1Present)
545 {
546 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
547 pgmPoolFree(pVM,
548 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
549 pPage->idx,
550 iShw2);
551 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
552 }
553 }
554 break;
555 }
556
557 case PGMPOOLKIND_PAE_PDPT:
558 {
559 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
560 /*
561 * Hopefully this doesn't happen very often:
562 * - touching unused parts of the page
563 * - messing with the bits of pd pointers without changing the physical address
564 */
565 /* PDPT roots are not page aligned; 32 byte only! */
566 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
567
568 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
569 const unsigned iShw = offPdpt / sizeof(X86PDPE);
570 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
571 {
572# ifndef IN_RING0
573 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
574 {
575 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
576 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
577 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
578 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
579 break;
580 }
581# endif /* !IN_RING0 */
582# ifndef IN_RING0
583 else
584# endif /* !IN_RING0 */
585 if (uShw.pPDPT->a[iShw].n.u1Present)
586 {
587 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
588 pgmPoolFree(pVM,
589 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
590 pPage->idx,
591 iShw);
592 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
593 }
594
595 /* paranoia / a bit assumptive. */
596 if ( (offPdpt & 7)
597 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
598 {
599 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
600 if ( iShw2 != iShw
601 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
602 {
603# ifndef IN_RING0
604 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
605 {
606 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
607 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
608 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
609 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
610 break;
611 }
612# endif /* !IN_RING0 */
613# ifndef IN_RING0
614 else
615# endif /* !IN_RING0 */
616 if (uShw.pPDPT->a[iShw2].n.u1Present)
617 {
618 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
619 pgmPoolFree(pVM,
620 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
621 pPage->idx,
622 iShw2);
623 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
624 }
625 }
626 }
627 }
628 break;
629 }
630
631#ifndef IN_RC
632 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
633 {
634 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
635 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
636 const unsigned iShw = off / sizeof(X86PDEPAE);
637 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
638 if (uShw.pPDPae->a[iShw].n.u1Present)
639 {
640 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
641 pgmPoolFree(pVM,
642 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
643 pPage->idx,
644 iShw);
645 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
646 }
647 /* paranoia / a bit assumptive. */
648 if ( (off & 7)
649 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
650 {
651 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
652 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
653
654 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
655 if (uShw.pPDPae->a[iShw2].n.u1Present)
656 {
657 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
658 pgmPoolFree(pVM,
659 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
660 pPage->idx,
661 iShw2);
662 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
663 }
664 }
665 break;
666 }
667
668 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
669 {
670 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
671 /*
672 * Hopefully this doesn't happen very often:
673 * - messing with the bits of pd pointers without changing the physical address
674 */
675 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
676 const unsigned iShw = off / sizeof(X86PDPE);
677 if (uShw.pPDPT->a[iShw].n.u1Present)
678 {
679 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
680 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
681 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
682 }
683 /* paranoia / a bit assumptive. */
684 if ( (off & 7)
685 && (off & 7) + cbWrite > sizeof(X86PDPE))
686 {
687 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
688 if (uShw.pPDPT->a[iShw2].n.u1Present)
689 {
690 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
691 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
692 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
693 }
694 }
695 break;
696 }
697
698 case PGMPOOLKIND_64BIT_PML4:
699 {
700 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
701 /*
702 * Hopefully this doesn't happen very often:
703 * - messing with the bits of pd pointers without changing the physical address
704 */
705 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
706 const unsigned iShw = off / sizeof(X86PDPE);
707 if (uShw.pPML4->a[iShw].n.u1Present)
708 {
709 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
710 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
711 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
712 }
713 /* paranoia / a bit assumptive. */
714 if ( (off & 7)
715 && (off & 7) + cbWrite > sizeof(X86PDPE))
716 {
717 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
718 if (uShw.pPML4->a[iShw2].n.u1Present)
719 {
720 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
721 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
722 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
723 }
724 }
725 break;
726 }
727#endif /* IN_RING0 */
728
729 default:
730 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
731 }
732 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
733
734 /* next */
735 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
736 return;
737 pPage = &pPool->aPages[pPage->iMonitoredNext];
738 }
739}
740
741# ifndef IN_RING3
742/**
743 * Checks if a access could be a fork operation in progress.
744 *
745 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
746 *
747 * @returns true if it's likly that we're forking, otherwise false.
748 * @param pPool The pool.
749 * @param pDis The disassembled instruction.
750 * @param offFault The access offset.
751 */
752DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
753{
754 /*
755 * i386 linux is using btr to clear X86_PTE_RW.
756 * The functions involved are (2.6.16 source inspection):
757 * clear_bit
758 * ptep_set_wrprotect
759 * copy_one_pte
760 * copy_pte_range
761 * copy_pmd_range
762 * copy_pud_range
763 * copy_page_range
764 * dup_mmap
765 * dup_mm
766 * copy_mm
767 * copy_process
768 * do_fork
769 */
770 if ( pDis->pCurInstr->opcode == OP_BTR
771 && !(offFault & 4)
772 /** @todo Validate that the bit index is X86_PTE_RW. */
773 )
774 {
775 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
776 return true;
777 }
778 return false;
779}
780
781
782/**
783 * Determine whether the page is likely to have been reused.
784 *
785 * @returns true if we consider the page as being reused for a different purpose.
786 * @returns false if we consider it to still be a paging page.
787 * @param pVM VM Handle.
788 * @param pVCpu VMCPU Handle.
789 * @param pRegFrame Trap register frame.
790 * @param pDis The disassembly info for the faulting instruction.
791 * @param pvFault The fault address.
792 *
793 * @remark The REP prefix check is left to the caller because of STOSD/W.
794 */
795DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
796{
797#ifndef IN_RC
798 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
799 if ( HWACCMHasPendingIrq(pVM)
800 && (pRegFrame->rsp - pvFault) < 32)
801 {
802 /* Fault caused by stack writes while trying to inject an interrupt event. */
803 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
804 return true;
805 }
806#else
807 NOREF(pVM); NOREF(pvFault);
808#endif
809
810 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
811
812 /* Non-supervisor mode write means it's used for something else. */
813 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
814 return true;
815
816 switch (pDis->pCurInstr->opcode)
817 {
818 /* call implies the actual push of the return address faulted */
819 case OP_CALL:
820 Log4(("pgmPoolMonitorIsReused: CALL\n"));
821 return true;
822 case OP_PUSH:
823 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
824 return true;
825 case OP_PUSHF:
826 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
827 return true;
828 case OP_PUSHA:
829 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
830 return true;
831 case OP_FXSAVE:
832 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
833 return true;
834 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
835 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
836 return true;
837 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
838 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
839 return true;
840 case OP_MOVSWD:
841 case OP_STOSWD:
842 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
843 && pRegFrame->rcx >= 0x40
844 )
845 {
846 Assert(pDis->mode == CPUMODE_64BIT);
847
848 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
849 return true;
850 }
851 return false;
852 }
853 if ( ( (pDis->param1.flags & USE_REG_GEN32)
854 || (pDis->param1.flags & USE_REG_GEN64))
855 && (pDis->param1.base.reg_gen == USE_REG_ESP))
856 {
857 Log4(("pgmPoolMonitorIsReused: ESP\n"));
858 return true;
859 }
860
861 return false;
862}
863
864/**
865 * Flushes the page being accessed.
866 *
867 * @returns VBox status code suitable for scheduling.
868 * @param pVM The VM handle.
869 * @param pVCpu The VMCPU handle.
870 * @param pPool The pool.
871 * @param pPage The pool page (head).
872 * @param pDis The disassembly of the write instruction.
873 * @param pRegFrame The trap register frame.
874 * @param GCPhysFault The fault address as guest physical address.
875 * @param pvFault The fault address.
876 */
877static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
878 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
879{
880 /*
881 * First, do the flushing.
882 */
883 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
884
885 /*
886 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection). Must do this in raw mode (!); XP boot will fail otherwise
887 */
888 uint32_t cbWritten;
889 int rc2 = EMInterpretInstructionCPUEx(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten, EMCODETYPE_ALL);
890 if (RT_SUCCESS(rc2))
891 pRegFrame->rip += pDis->opsize;
892 else if (rc2 == VERR_EM_INTERPRETER)
893 {
894#ifdef IN_RC
895 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
896 {
897 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
898 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
899 rc = VINF_SUCCESS;
900 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
901 }
902 else
903#endif
904 {
905 rc = VINF_EM_RAW_EMULATE_INSTR;
906 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
907 }
908 }
909 else
910 rc = rc2;
911
912 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
913 return rc;
914}
915
916/**
917 * Handles the STOSD write accesses.
918 *
919 * @returns VBox status code suitable for scheduling.
920 * @param pVM The VM handle.
921 * @param pPool The pool.
922 * @param pPage The pool page (head).
923 * @param pDis The disassembly of the write instruction.
924 * @param pRegFrame The trap register frame.
925 * @param GCPhysFault The fault address as guest physical address.
926 * @param pvFault The fault address.
927 */
928DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
929 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
930{
931 unsigned uIncrement = pDis->param1.size;
932
933 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
934 Assert(pRegFrame->rcx <= 0x20);
935
936#ifdef VBOX_STRICT
937 if (pDis->opmode == CPUMODE_32BIT)
938 Assert(uIncrement == 4);
939 else
940 Assert(uIncrement == 8);
941#endif
942
943 Log3(("pgmPoolAccessHandlerSTOSD\n"));
944
945 /*
946 * Increment the modification counter and insert it into the list
947 * of modified pages the first time.
948 */
949 if (!pPage->cModifications++)
950 pgmPoolMonitorModifiedInsert(pPool, pPage);
951
952 /*
953 * Execute REP STOSD.
954 *
955 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
956 * write situation, meaning that it's safe to write here.
957 */
958 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
959 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
960 while (pRegFrame->rcx)
961 {
962#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
963 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
964 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
965 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
966#else
967 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
968#endif
969#ifdef IN_RC
970 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
971#else
972 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
973#endif
974 pu32 += uIncrement;
975 GCPhysFault += uIncrement;
976 pRegFrame->rdi += uIncrement;
977 pRegFrame->rcx--;
978 }
979 pRegFrame->rip += pDis->opsize;
980
981 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
982 return VINF_SUCCESS;
983}
984
985
986/**
987 * Handles the simple write accesses.
988 *
989 * @returns VBox status code suitable for scheduling.
990 * @param pVM The VM handle.
991 * @param pVCpu The VMCPU handle.
992 * @param pPool The pool.
993 * @param pPage The pool page (head).
994 * @param pDis The disassembly of the write instruction.
995 * @param pRegFrame The trap register frame.
996 * @param GCPhysFault The fault address as guest physical address.
997 * @param pvFault The fault address.
998 * @param pfReused Reused state (out)
999 */
1000DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1001 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
1002{
1003 Log3(("pgmPoolAccessHandlerSimple\n"));
1004 /*
1005 * Increment the modification counter and insert it into the list
1006 * of modified pages the first time.
1007 */
1008 if (!pPage->cModifications++)
1009 pgmPoolMonitorModifiedInsert(pPool, pPage);
1010
1011 /*
1012 * Clear all the pages. ASSUMES that pvFault is readable.
1013 */
1014#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1015 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1016 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1017 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1018#else
1019 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1020#endif
1021
1022 /*
1023 * Interpret the instruction.
1024 */
1025 uint32_t cb;
1026 int rc = EMInterpretInstructionCPUEx(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb, EMCODETYPE_ALL);
1027 if (RT_SUCCESS(rc))
1028 pRegFrame->rip += pDis->opsize;
1029 else if (rc == VERR_EM_INTERPRETER)
1030 {
1031 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1032 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1033 rc = VINF_EM_RAW_EMULATE_INSTR;
1034 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1035 }
1036
1037#if 0 /* experimental code */
1038 if (rc == VINF_SUCCESS)
1039 {
1040 switch (pPage->enmKind)
1041 {
1042 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1043 {
1044 X86PTEPAE GstPte;
1045 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1046 AssertRC(rc);
1047
1048 /* Check the new value written by the guest. If present and with a bogus physical address, then
1049 * it's fairly safe to assume the guest is reusing the PT.
1050 */
1051 if (GstPte.n.u1Present)
1052 {
1053 RTHCPHYS HCPhys = -1;
1054 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1055 if (rc != VINF_SUCCESS)
1056 {
1057 *pfReused = true;
1058 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1059 }
1060 }
1061 break;
1062 }
1063 }
1064 }
1065#endif
1066
1067 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1068 return rc;
1069}
1070
1071/**
1072 * \#PF Handler callback for PT write accesses.
1073 *
1074 * @returns VBox status code (appropriate for GC return).
1075 * @param pVM VM Handle.
1076 * @param uErrorCode CPU Error code.
1077 * @param pRegFrame Trap register frame.
1078 * NULL on DMA and other non CPU access.
1079 * @param pvFault The fault address (cr2).
1080 * @param GCPhysFault The GC physical address corresponding to pvFault.
1081 * @param pvUser User argument.
1082 */
1083DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1084{
1085 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1086 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1087 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1088 PVMCPU pVCpu = VMMGetCpu(pVM);
1089 unsigned cMaxModifications;
1090 bool fForcedFlush = false;
1091
1092 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1093
1094 pgmLock(pVM);
1095 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1096 {
1097 /* Pool page changed while we were waiting for the lock; ignore. */
1098 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1099 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1100 pgmUnlock(pVM);
1101 return VINF_SUCCESS;
1102 }
1103#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1104 if (pPage->fDirty)
1105 {
1106 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1107 pgmUnlock(pVM);
1108 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1109 }
1110#endif
1111
1112#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1113 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1114 {
1115 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1116 void *pvGst;
1117 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1118 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1119 }
1120#endif
1121
1122 /*
1123 * Disassemble the faulting instruction.
1124 */
1125 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1126 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1127 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1128 {
1129 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1130 pgmUnlock(pVM);
1131 return rc;
1132 }
1133
1134 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1135
1136 /*
1137 * We should ALWAYS have the list head as user parameter. This
1138 * is because we use that page to record the changes.
1139 */
1140 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1141
1142#ifdef IN_RING0
1143 /* Maximum nr of modifications depends on the page type. */
1144 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1145 cMaxModifications = 4;
1146 else
1147 cMaxModifications = 24;
1148#else
1149 cMaxModifications = 48;
1150#endif
1151
1152 /*
1153 * Incremental page table updates should weigh more than random ones.
1154 * (Only applies when started from offset 0)
1155 */
1156 pVCpu->pgm.s.cPoolAccessHandler++;
1157 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1158 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1159 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1160 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1161 {
1162 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1163 Assert(pPage->cModifications < 32000);
1164 pPage->cModifications = pPage->cModifications * 2;
1165 pPage->pvLastAccessHandlerFault = pvFault;
1166 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1167 if (pPage->cModifications >= cMaxModifications)
1168 {
1169 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1170 fForcedFlush = true;
1171 }
1172 }
1173
1174 if (pPage->cModifications >= cMaxModifications)
1175 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1176
1177 /*
1178 * Check if it's worth dealing with.
1179 */
1180 bool fReused = false;
1181 bool fNotReusedNotForking = false;
1182 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1183 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1184 )
1185 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1186 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1187 {
1188 /*
1189 * Simple instructions, no REP prefix.
1190 */
1191 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1192 {
1193 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1194 if (fReused)
1195 goto flushPage;
1196
1197 /* A mov instruction to change the first page table entry will be remembered so we can detect
1198 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1199 */
1200 if ( rc == VINF_SUCCESS
1201 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1202 && pDis->pCurInstr->opcode == OP_MOV
1203 && (pvFault & PAGE_OFFSET_MASK) == 0)
1204 {
1205 pPage->pvLastAccessHandlerFault = pvFault;
1206 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1207 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1208 /* Make sure we don't kick out a page too quickly. */
1209 if (pPage->cModifications > 8)
1210 pPage->cModifications = 2;
1211 }
1212 else
1213 if (pPage->pvLastAccessHandlerFault == pvFault)
1214 {
1215 /* ignore the 2nd write to this page table entry. */
1216 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1217 }
1218 else
1219 {
1220 pPage->pvLastAccessHandlerFault = 0;
1221 pPage->pvLastAccessHandlerRip = 0;
1222 }
1223
1224 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1225 pgmUnlock(pVM);
1226 return rc;
1227 }
1228
1229 /*
1230 * Windows is frequently doing small memset() operations (netio test 4k+).
1231 * We have to deal with these or we'll kill the cache and performance.
1232 */
1233 if ( pDis->pCurInstr->opcode == OP_STOSWD
1234 && !pRegFrame->eflags.Bits.u1DF
1235 && pDis->opmode == pDis->mode
1236 && pDis->addrmode == pDis->mode)
1237 {
1238 bool fValidStosd = false;
1239
1240 if ( pDis->mode == CPUMODE_32BIT
1241 && pDis->prefix == PREFIX_REP
1242 && pRegFrame->ecx <= 0x20
1243 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1244 && !((uintptr_t)pvFault & 3)
1245 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1246 )
1247 {
1248 fValidStosd = true;
1249 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1250 }
1251 else
1252 if ( pDis->mode == CPUMODE_64BIT
1253 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1254 && pRegFrame->rcx <= 0x20
1255 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1256 && !((uintptr_t)pvFault & 7)
1257 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1258 )
1259 {
1260 fValidStosd = true;
1261 }
1262
1263 if (fValidStosd)
1264 {
1265 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1266 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1267 pgmUnlock(pVM);
1268 return rc;
1269 }
1270 }
1271
1272 /* REP prefix, don't bother. */
1273 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1274 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1275 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1276 fNotReusedNotForking = true;
1277 }
1278
1279#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1280 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1281 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1282 */
1283 if ( pPage->cModifications >= cMaxModifications
1284 && !fForcedFlush
1285 && pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1286 && ( fNotReusedNotForking
1287 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1288 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1289 )
1290 )
1291 {
1292 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1293 Assert(pPage->fDirty == false);
1294
1295 /* Flush any monitored duplicates as we will disable write protection. */
1296 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1297 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1298 {
1299 PPGMPOOLPAGE pPageHead = pPage;
1300
1301 /* Find the monitor head. */
1302 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1303 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1304
1305 while (pPageHead)
1306 {
1307 unsigned idxNext = pPageHead->iMonitoredNext;
1308
1309 if (pPageHead != pPage)
1310 {
1311 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1312 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1313 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1314 AssertRC(rc2);
1315 }
1316
1317 if (idxNext == NIL_PGMPOOL_IDX)
1318 break;
1319
1320 pPageHead = &pPool->aPages[idxNext];
1321 }
1322 }
1323
1324 /* The flushing above might fail for locked pages, so double check. */
1325 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1326 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1327 {
1328 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1329
1330 /* Temporarily allow write access to the page table again. */
1331 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
1332 if (rc == VINF_SUCCESS)
1333 {
1334 rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
1335 AssertMsg(rc == VINF_SUCCESS
1336 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1337 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1338 || rc == VERR_PAGE_NOT_PRESENT,
1339 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1340
1341 pPage->pvDirtyFault = pvFault;
1342
1343 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1344 pgmUnlock(pVM);
1345 return rc;
1346 }
1347 }
1348 }
1349#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1350
1351 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1352flushPage:
1353 /*
1354 * Not worth it, so flush it.
1355 *
1356 * If we considered it to be reused, don't go back to ring-3
1357 * to emulate failed instructions since we usually cannot
1358 * interpret then. This may be a bit risky, in which case
1359 * the reuse detection must be fixed.
1360 */
1361 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1362 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1363 && fReused)
1364 {
1365 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1366 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1367 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1368 }
1369 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1370 pgmUnlock(pVM);
1371 return rc;
1372}
1373
1374# endif /* !IN_RING3 */
1375
1376# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1377
1378# ifdef VBOX_STRICT
1379/**
1380 * Check references to guest physical memory in a PAE / PAE page table.
1381 *
1382 * @param pPool The pool.
1383 * @param pPage The page.
1384 * @param pShwPT The shadow page table (mapping of the page).
1385 * @param pGstPT The guest page table.
1386 */
1387static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
1388{
1389 unsigned cErrors = 0;
1390 int LastRc = -1; /* initialized to shut up gcc */
1391 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1392 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1393
1394#ifdef VBOX_STRICT
1395 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1396 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1397#endif
1398 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1399 {
1400 if (pShwPT->a[i].n.u1Present)
1401 {
1402 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1403 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1404 if ( rc != VINF_SUCCESS
1405 || (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) != HCPhys)
1406 {
1407 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1408 LastPTE = i;
1409 LastRc = rc;
1410 LastHCPhys = HCPhys;
1411 cErrors++;
1412
1413 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1414 rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pPage->GCPhys, &HCPhysPT);
1415 AssertRC(rc);
1416
1417 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1418 {
1419 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1420
1421 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1422 {
1423 PX86PTPAE pShwPT2 = (PX86PTPAE)PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pTempPage);
1424
1425 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1426 {
1427 if ( pShwPT2->a[j].n.u1Present
1428 && pShwPT2->a[j].n.u1Write
1429 && ((pShwPT2->a[j].u & X86_PTE_PAE_PG_MASK) == HCPhysPT))
1430 {
1431 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, pShwPT->a[j].u, pShwPT2->a[j].u));
1432 }
1433 }
1434 }
1435 }
1436 }
1437 }
1438 }
1439 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, pShwPT->a[LastPTE].u, LastHCPhys));
1440}
1441# endif /* VBOX_STRICT */
1442
1443/**
1444 * Clear references to guest physical memory in a PAE / PAE page table.
1445 *
1446 * @returns nr of changed PTEs
1447 * @param pPool The pool.
1448 * @param pPage The page.
1449 * @param pShwPT The shadow page table (mapping of the page).
1450 * @param pGstPT The guest page table.
1451 * @param pOldGstPT The old cached guest page table.
1452 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1453 * @param pfFlush Flush reused page table (out)
1454 */
1455DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT, PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1456{
1457 unsigned cChanged = 0;
1458
1459#ifdef VBOX_STRICT
1460 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1461 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1462#endif
1463 *pfFlush = false;
1464
1465 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1466 {
1467 /* Check the new value written by the guest. If present and with a bogus physical address, then
1468 * it's fairly safe to assume the guest is reusing the PT.
1469 */
1470 if ( fAllowRemoval
1471 && pGstPT->a[i].n.u1Present)
1472 {
1473 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1474 {
1475 *pfFlush = true;
1476 return ++cChanged;
1477 }
1478 }
1479 if (pShwPT->a[i].n.u1Present)
1480 {
1481 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1482 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1483 {
1484#ifdef VBOX_STRICT
1485 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1486 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1487 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1488#endif
1489 uint64_t uHostAttr = pShwPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1490 bool fHostRW = !!(pShwPT->a[i].u & X86_PTE_RW);
1491 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1492 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1493
1494 if ( uHostAttr == uGuestAttr
1495 && fHostRW <= fGuestRW)
1496 continue;
1497 }
1498 cChanged++;
1499 /* Something was changed, so flush it. */
1500 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1501 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1502 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1503 ASMAtomicWriteSize(&pShwPT->a[i].u, 0);
1504 }
1505 }
1506 return cChanged;
1507}
1508
1509
1510/**
1511 * Flush a dirty page
1512 *
1513 * @param pVM VM Handle.
1514 * @param pPool The pool.
1515 * @param idxSlot Dirty array slot index
1516 * @param fAllowRemoval Allow a reused page table to be removed
1517 */
1518static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1519{
1520 PPGMPOOLPAGE pPage;
1521 unsigned idxPage;
1522
1523 Assert(idxSlot < RT_ELEMENTS(pPool->aIdxDirtyPages));
1524 if (pPool->aIdxDirtyPages[idxSlot] == NIL_PGMPOOL_IDX)
1525 return;
1526
1527 idxPage = pPool->aIdxDirtyPages[idxSlot];
1528 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1529 pPage = &pPool->aPages[idxPage];
1530 Assert(pPage->idx == idxPage);
1531 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1532
1533 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1534 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1535
1536 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1537 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys);
1538 Assert(rc == VINF_SUCCESS);
1539 pPage->fDirty = false;
1540
1541#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1542 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(VMMGetCpu(pVM));
1543#endif
1544
1545#ifdef VBOX_STRICT
1546 uint64_t fFlags = 0;
1547 RTHCPHYS HCPhys;
1548 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1549 AssertMsg( ( rc == VINF_SUCCESS
1550 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1551 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1552 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1553 || rc == VERR_PAGE_NOT_PRESENT,
1554 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1555#endif
1556
1557 /* Flush those PTEs that have changed. */
1558 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1559 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1560 void *pvGst;
1561 bool fFlush;
1562 rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1563 unsigned cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst, (PCX86PTPAE)&pPool->aDirtyPages[idxSlot][0], fAllowRemoval, &fFlush);
1564 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1565 /** Note: we might want to consider keeping the dirty page active in case there were many changes. */
1566
1567 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1568 Assert(pPage->cModifications);
1569 if (cChanges < 4)
1570 pPage->cModifications = 1; /* must use > 0 here */
1571 else
1572 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1573
1574 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1575 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages))
1576 pPool->idxFreeDirtyPage = idxSlot;
1577
1578 pPool->cDirtyPages--;
1579 pPool->aIdxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1580 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1581 if (fFlush)
1582 {
1583 Assert(fAllowRemoval);
1584 Log(("Flush reused page table!\n"));
1585 pgmPoolFlushPage(pPool, pPage);
1586 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1587 }
1588 else
1589 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1590
1591#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1592 PGMDynMapPopAutoSubset(VMMGetCpu(pVM), iPrevSubset);
1593#endif
1594}
1595
1596# ifndef IN_RING3
1597/**
1598 * Add a new dirty page
1599 *
1600 * @param pVM VM Handle.
1601 * @param pPool The pool.
1602 * @param pPage The page.
1603 */
1604void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1605{
1606 unsigned idxFree;
1607
1608 Assert(PGMIsLocked(pVM));
1609 AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
1610 Assert(!pPage->fDirty);
1611
1612 idxFree = pPool->idxFreeDirtyPage;
1613 Assert(idxFree < RT_ELEMENTS(pPool->aIdxDirtyPages));
1614 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1615
1616 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aIdxDirtyPages))
1617 {
1618 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1619 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1620 }
1621 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages));
1622 AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1623
1624 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1625
1626 /* Make a copy of the guest page table as we require valid GCPhys addresses when removing
1627 * references to physical pages. (the HCPhys linear lookup is *extremely* expensive!)
1628 */
1629 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1630 void *pvGst;
1631 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1632 memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
1633#ifdef VBOX_STRICT
1634 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1635#endif
1636
1637 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1638 pPage->fDirty = true;
1639 pPage->idxDirty = idxFree;
1640 pPool->aIdxDirtyPages[idxFree] = pPage->idx;
1641 pPool->cDirtyPages++;
1642
1643 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1644 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages)
1645 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1646 {
1647 unsigned i;
1648 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1649 {
1650 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1651 if (pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1652 {
1653 pPool->idxFreeDirtyPage = idxFree;
1654 break;
1655 }
1656 }
1657 Assert(i != RT_ELEMENTS(pPool->aIdxDirtyPages));
1658 }
1659
1660 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages) || pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1661 return;
1662}
1663# endif /* !IN_RING3 */
1664
1665/**
1666 * Check if the specified page is dirty (not write monitored)
1667 *
1668 * @return dirty or not
1669 * @param pVM VM Handle.
1670 * @param GCPhys Guest physical address
1671 */
1672bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1673{
1674 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1675 Assert(PGMIsLocked(pVM));
1676 if (!pPool->cDirtyPages)
1677 return false;
1678
1679 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1680
1681 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1682 {
1683 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1684 {
1685 PPGMPOOLPAGE pPage;
1686 unsigned idxPage = pPool->aIdxDirtyPages[i];
1687
1688 pPage = &pPool->aPages[idxPage];
1689 if (pPage->GCPhys == GCPhys)
1690 return true;
1691 }
1692 }
1693 return false;
1694}
1695
1696/**
1697 * Reset all dirty pages by reinstating page monitoring.
1698 *
1699 * @param pVM VM Handle.
1700 */
1701void pgmPoolResetDirtyPages(PVM pVM)
1702{
1703 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1704 Assert(PGMIsLocked(pVM));
1705 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1706
1707 if (!pPool->cDirtyPages)
1708 return;
1709
1710 Log(("pgmPoolResetDirtyPages\n"));
1711 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1712 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1713
1714 pPool->idxFreeDirtyPage = 0;
1715 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1716 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1717 {
1718 unsigned i;
1719 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1720 {
1721 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1722 {
1723 pPool->idxFreeDirtyPage = i;
1724 break;
1725 }
1726 }
1727 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1728 }
1729
1730 Assert(pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages));
1731 return;
1732}
1733
1734/**
1735 * Reset all dirty pages by reinstating page monitoring.
1736 *
1737 * @param pVM VM Handle.
1738 * @param GCPhysPT Physical address of the page table
1739 */
1740void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1741{
1742 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1743 Assert(PGMIsLocked(pVM));
1744 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1745 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aIdxDirtyPages);
1746
1747 if (!pPool->cDirtyPages)
1748 return;
1749
1750 GCPhysPT = GCPhysPT & ~(RTGCPHYS)(PAGE_SIZE - 1);
1751
1752 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1753 {
1754 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1755 {
1756 unsigned idxPage = pPool->aIdxDirtyPages[i];
1757
1758 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1759 if (pPage->GCPhys == GCPhysPT)
1760 {
1761 idxDirtyPage = i;
1762 break;
1763 }
1764 }
1765 }
1766
1767 if (idxDirtyPage != RT_ELEMENTS(pPool->aIdxDirtyPages))
1768 {
1769 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1770 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1771 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1772 {
1773 unsigned i;
1774 for (i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1775 {
1776 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1777 {
1778 pPool->idxFreeDirtyPage = i;
1779 break;
1780 }
1781 }
1782 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1783 }
1784 }
1785}
1786
1787# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1788
1789/**
1790 * Inserts a page into the GCPhys hash table.
1791 *
1792 * @param pPool The pool.
1793 * @param pPage The page.
1794 */
1795DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1796{
1797 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1798 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1799 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1800 pPage->iNext = pPool->aiHash[iHash];
1801 pPool->aiHash[iHash] = pPage->idx;
1802}
1803
1804
1805/**
1806 * Removes a page from the GCPhys hash table.
1807 *
1808 * @param pPool The pool.
1809 * @param pPage The page.
1810 */
1811DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1812{
1813 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1814 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1815 if (pPool->aiHash[iHash] == pPage->idx)
1816 pPool->aiHash[iHash] = pPage->iNext;
1817 else
1818 {
1819 uint16_t iPrev = pPool->aiHash[iHash];
1820 for (;;)
1821 {
1822 const int16_t i = pPool->aPages[iPrev].iNext;
1823 if (i == pPage->idx)
1824 {
1825 pPool->aPages[iPrev].iNext = pPage->iNext;
1826 break;
1827 }
1828 if (i == NIL_PGMPOOL_IDX)
1829 {
1830 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1831 break;
1832 }
1833 iPrev = i;
1834 }
1835 }
1836 pPage->iNext = NIL_PGMPOOL_IDX;
1837}
1838
1839
1840/**
1841 * Frees up one cache page.
1842 *
1843 * @returns VBox status code.
1844 * @retval VINF_SUCCESS on success.
1845 * @param pPool The pool.
1846 * @param iUser The user index.
1847 */
1848static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1849{
1850#ifndef IN_RC
1851 const PVM pVM = pPool->CTX_SUFF(pVM);
1852#endif
1853 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1854 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1855
1856 /*
1857 * Select one page from the tail of the age list.
1858 */
1859 PPGMPOOLPAGE pPage;
1860 for (unsigned iLoop = 0; ; iLoop++)
1861 {
1862 uint16_t iToFree = pPool->iAgeTail;
1863 if (iToFree == iUser)
1864 iToFree = pPool->aPages[iToFree].iAgePrev;
1865/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1866 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1867 {
1868 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1869 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1870 {
1871 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1872 continue;
1873 iToFree = i;
1874 break;
1875 }
1876 }
1877*/
1878 Assert(iToFree != iUser);
1879 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1880 pPage = &pPool->aPages[iToFree];
1881
1882 /*
1883 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1884 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1885 */
1886 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1887 break;
1888 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1889 pgmPoolCacheUsed(pPool, pPage);
1890 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1891 }
1892
1893 /*
1894 * Found a usable page, flush it and return.
1895 */
1896 int rc = pgmPoolFlushPage(pPool, pPage);
1897 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1898 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
1899 if (rc == VINF_SUCCESS)
1900 PGM_INVL_ALL_VCPU_TLBS(pVM);
1901 return rc;
1902}
1903
1904
1905/**
1906 * Checks if a kind mismatch is really a page being reused
1907 * or if it's just normal remappings.
1908 *
1909 * @returns true if reused and the cached page (enmKind1) should be flushed
1910 * @returns false if not reused.
1911 * @param enmKind1 The kind of the cached page.
1912 * @param enmKind2 The kind of the requested page.
1913 */
1914static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1915{
1916 switch (enmKind1)
1917 {
1918 /*
1919 * Never reuse them. There is no remapping in non-paging mode.
1920 */
1921 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1922 case PGMPOOLKIND_32BIT_PD_PHYS:
1923 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1924 case PGMPOOLKIND_PAE_PD_PHYS:
1925 case PGMPOOLKIND_PAE_PDPT_PHYS:
1926 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1927 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1928 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1929 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1930 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1931 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1932 return false;
1933
1934 /*
1935 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1936 */
1937 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1938 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1939 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1940 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1941 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1942 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1943 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1944 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1945 case PGMPOOLKIND_32BIT_PD:
1946 case PGMPOOLKIND_PAE_PDPT:
1947 switch (enmKind2)
1948 {
1949 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1950 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1951 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1952 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1953 case PGMPOOLKIND_64BIT_PML4:
1954 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1955 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1956 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1957 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1958 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1959 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1960 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1961 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1962 return true;
1963 default:
1964 return false;
1965 }
1966
1967 /*
1968 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1969 */
1970 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1971 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1972 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1973 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1974 case PGMPOOLKIND_64BIT_PML4:
1975 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1976 switch (enmKind2)
1977 {
1978 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1979 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1980 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1981 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1982 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1983 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1984 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1985 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1986 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1987 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1988 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1989 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1990 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1991 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1992 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1993 return true;
1994 default:
1995 return false;
1996 }
1997
1998 /*
1999 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2000 */
2001 case PGMPOOLKIND_ROOT_NESTED:
2002 return false;
2003
2004 default:
2005 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2006 }
2007}
2008
2009
2010/**
2011 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2012 *
2013 * @returns VBox status code.
2014 * @retval VINF_PGM_CACHED_PAGE on success.
2015 * @retval VERR_FILE_NOT_FOUND if not found.
2016 * @param pPool The pool.
2017 * @param GCPhys The GC physical address of the page we're gonna shadow.
2018 * @param enmKind The kind of mapping.
2019 * @param enmAccess Access type for the mapping (only relevant for big pages)
2020 * @param iUser The shadow page pool index of the user table.
2021 * @param iUserTable The index into the user table (shadowed).
2022 * @param ppPage Where to store the pointer to the page.
2023 */
2024static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2025{
2026#ifndef IN_RC
2027 const PVM pVM = pPool->CTX_SUFF(pVM);
2028#endif
2029 /*
2030 * Look up the GCPhys in the hash.
2031 */
2032 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2033 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2034 if (i != NIL_PGMPOOL_IDX)
2035 {
2036 do
2037 {
2038 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2039 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2040 if (pPage->GCPhys == GCPhys)
2041 {
2042 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2043 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2044 {
2045 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2046 * doesn't flush it in case there are no more free use records.
2047 */
2048 pgmPoolCacheUsed(pPool, pPage);
2049
2050 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2051 if (RT_SUCCESS(rc))
2052 {
2053 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2054 *ppPage = pPage;
2055 if (pPage->cModifications)
2056 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2057 STAM_COUNTER_INC(&pPool->StatCacheHits);
2058 return VINF_PGM_CACHED_PAGE;
2059 }
2060 return rc;
2061 }
2062
2063 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2064 {
2065 /*
2066 * The kind is different. In some cases we should now flush the page
2067 * as it has been reused, but in most cases this is normal remapping
2068 * of PDs as PT or big pages using the GCPhys field in a slightly
2069 * different way than the other kinds.
2070 */
2071 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2072 {
2073 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2074 pgmPoolFlushPage(pPool, pPage);
2075 break;
2076 }
2077 }
2078 }
2079
2080 /* next */
2081 i = pPage->iNext;
2082 } while (i != NIL_PGMPOOL_IDX);
2083 }
2084
2085 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2086 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2087 return VERR_FILE_NOT_FOUND;
2088}
2089
2090
2091/**
2092 * Inserts a page into the cache.
2093 *
2094 * @param pPool The pool.
2095 * @param pPage The cached page.
2096 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2097 */
2098static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2099{
2100 /*
2101 * Insert into the GCPhys hash if the page is fit for that.
2102 */
2103 Assert(!pPage->fCached);
2104 if (fCanBeCached)
2105 {
2106 pPage->fCached = true;
2107 pgmPoolHashInsert(pPool, pPage);
2108 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2109 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2110 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2111 }
2112 else
2113 {
2114 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2115 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2116 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2117 }
2118
2119 /*
2120 * Insert at the head of the age list.
2121 */
2122 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2123 pPage->iAgeNext = pPool->iAgeHead;
2124 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2125 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2126 else
2127 pPool->iAgeTail = pPage->idx;
2128 pPool->iAgeHead = pPage->idx;
2129}
2130
2131
2132/**
2133 * Flushes a cached page.
2134 *
2135 * @param pPool The pool.
2136 * @param pPage The cached page.
2137 */
2138static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2139{
2140 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2141
2142 /*
2143 * Remove the page from the hash.
2144 */
2145 if (pPage->fCached)
2146 {
2147 pPage->fCached = false;
2148 pgmPoolHashRemove(pPool, pPage);
2149 }
2150 else
2151 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2152
2153 /*
2154 * Remove it from the age list.
2155 */
2156 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2157 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2158 else
2159 pPool->iAgeTail = pPage->iAgePrev;
2160 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2161 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2162 else
2163 pPool->iAgeHead = pPage->iAgeNext;
2164 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2165 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2166}
2167
2168
2169/**
2170 * Looks for pages sharing the monitor.
2171 *
2172 * @returns Pointer to the head page.
2173 * @returns NULL if not found.
2174 * @param pPool The Pool
2175 * @param pNewPage The page which is going to be monitored.
2176 */
2177static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2178{
2179 /*
2180 * Look up the GCPhys in the hash.
2181 */
2182 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2183 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2184 if (i == NIL_PGMPOOL_IDX)
2185 return NULL;
2186 do
2187 {
2188 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2189 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2190 && pPage != pNewPage)
2191 {
2192 switch (pPage->enmKind)
2193 {
2194 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2195 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2196 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2197 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2198 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2199 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2200 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2201 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2202 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2203 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2204 case PGMPOOLKIND_64BIT_PML4:
2205 case PGMPOOLKIND_32BIT_PD:
2206 case PGMPOOLKIND_PAE_PDPT:
2207 {
2208 /* find the head */
2209 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2210 {
2211 Assert(pPage->iMonitoredPrev != pPage->idx);
2212 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2213 }
2214 return pPage;
2215 }
2216
2217 /* ignore, no monitoring. */
2218 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2219 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2220 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2221 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2222 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2223 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2224 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2225 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2226 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2227 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2228 case PGMPOOLKIND_ROOT_NESTED:
2229 case PGMPOOLKIND_PAE_PD_PHYS:
2230 case PGMPOOLKIND_PAE_PDPT_PHYS:
2231 case PGMPOOLKIND_32BIT_PD_PHYS:
2232 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2233 break;
2234 default:
2235 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2236 }
2237 }
2238
2239 /* next */
2240 i = pPage->iNext;
2241 } while (i != NIL_PGMPOOL_IDX);
2242 return NULL;
2243}
2244
2245
2246/**
2247 * Enabled write monitoring of a guest page.
2248 *
2249 * @returns VBox status code.
2250 * @retval VINF_SUCCESS on success.
2251 * @param pPool The pool.
2252 * @param pPage The cached page.
2253 */
2254static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2255{
2256 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
2257
2258 /*
2259 * Filter out the relevant kinds.
2260 */
2261 switch (pPage->enmKind)
2262 {
2263 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2264 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2265 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2266 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2267 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2268 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2269 case PGMPOOLKIND_64BIT_PML4:
2270 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2271 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2272 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2273 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2274 case PGMPOOLKIND_32BIT_PD:
2275 case PGMPOOLKIND_PAE_PDPT:
2276 break;
2277
2278 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2279 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2280 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2281 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2282 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2283 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2284 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2285 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2286 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2287 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2288 case PGMPOOLKIND_ROOT_NESTED:
2289 /* Nothing to monitor here. */
2290 return VINF_SUCCESS;
2291
2292 case PGMPOOLKIND_32BIT_PD_PHYS:
2293 case PGMPOOLKIND_PAE_PDPT_PHYS:
2294 case PGMPOOLKIND_PAE_PD_PHYS:
2295 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2296 /* Nothing to monitor here. */
2297 return VINF_SUCCESS;
2298 default:
2299 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2300 }
2301
2302 /*
2303 * Install handler.
2304 */
2305 int rc;
2306 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2307 if (pPageHead)
2308 {
2309 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2310 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2311
2312#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2313 if (pPageHead->fDirty)
2314 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2315#endif
2316
2317 pPage->iMonitoredPrev = pPageHead->idx;
2318 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2319 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2320 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2321 pPageHead->iMonitoredNext = pPage->idx;
2322 rc = VINF_SUCCESS;
2323 }
2324 else
2325 {
2326 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2327 PVM pVM = pPool->CTX_SUFF(pVM);
2328 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2329 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2330 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
2331 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2332 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2333 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2334 pPool->pszAccessHandler);
2335 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2336 * the heap size should suffice. */
2337 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2338 PVMCPU pVCpu = VMMGetCpu(pVM);
2339 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2340 }
2341 pPage->fMonitored = true;
2342 return rc;
2343}
2344
2345
2346/**
2347 * Disables write monitoring of a guest page.
2348 *
2349 * @returns VBox status code.
2350 * @retval VINF_SUCCESS on success.
2351 * @param pPool The pool.
2352 * @param pPage The cached page.
2353 */
2354static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2355{
2356 /*
2357 * Filter out the relevant kinds.
2358 */
2359 switch (pPage->enmKind)
2360 {
2361 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2362 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2363 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2364 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2365 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2366 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2367 case PGMPOOLKIND_64BIT_PML4:
2368 case PGMPOOLKIND_32BIT_PD:
2369 case PGMPOOLKIND_PAE_PDPT:
2370 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2371 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2372 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2373 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2374 break;
2375
2376 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2377 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2378 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2379 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2380 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2381 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2382 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2383 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2384 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2385 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2386 case PGMPOOLKIND_ROOT_NESTED:
2387 case PGMPOOLKIND_PAE_PD_PHYS:
2388 case PGMPOOLKIND_PAE_PDPT_PHYS:
2389 case PGMPOOLKIND_32BIT_PD_PHYS:
2390 /* Nothing to monitor here. */
2391 Assert(!pPage->fMonitored);
2392 return VINF_SUCCESS;
2393
2394 default:
2395 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2396 }
2397 Assert(pPage->fMonitored);
2398
2399 /*
2400 * Remove the page from the monitored list or uninstall it if last.
2401 */
2402 const PVM pVM = pPool->CTX_SUFF(pVM);
2403 int rc;
2404 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2405 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2406 {
2407 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2408 {
2409 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2410 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2411 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2412 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2413 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2414 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2415 pPool->pszAccessHandler);
2416 AssertFatalRCSuccess(rc);
2417 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2418 }
2419 else
2420 {
2421 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2422 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2423 {
2424 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2425 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2426 }
2427 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2428 rc = VINF_SUCCESS;
2429 }
2430 }
2431 else
2432 {
2433 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2434 AssertFatalRC(rc);
2435 PVMCPU pVCpu = VMMGetCpu(pVM);
2436 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2437 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2438 }
2439 pPage->fMonitored = false;
2440
2441 /*
2442 * Remove it from the list of modified pages (if in it).
2443 */
2444 pgmPoolMonitorModifiedRemove(pPool, pPage);
2445
2446 return rc;
2447}
2448
2449
2450/**
2451 * Inserts the page into the list of modified pages.
2452 *
2453 * @param pPool The pool.
2454 * @param pPage The page.
2455 */
2456void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2457{
2458 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2459 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2460 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2461 && pPool->iModifiedHead != pPage->idx,
2462 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2463 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2464 pPool->iModifiedHead, pPool->cModifiedPages));
2465
2466 pPage->iModifiedNext = pPool->iModifiedHead;
2467 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2468 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2469 pPool->iModifiedHead = pPage->idx;
2470 pPool->cModifiedPages++;
2471#ifdef VBOX_WITH_STATISTICS
2472 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2473 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2474#endif
2475}
2476
2477
2478/**
2479 * Removes the page from the list of modified pages and resets the
2480 * moficiation counter.
2481 *
2482 * @param pPool The pool.
2483 * @param pPage The page which is believed to be in the list of modified pages.
2484 */
2485static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2486{
2487 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2488 if (pPool->iModifiedHead == pPage->idx)
2489 {
2490 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2491 pPool->iModifiedHead = pPage->iModifiedNext;
2492 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2493 {
2494 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2495 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2496 }
2497 pPool->cModifiedPages--;
2498 }
2499 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2500 {
2501 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2502 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2503 {
2504 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2505 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2506 }
2507 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2508 pPool->cModifiedPages--;
2509 }
2510 else
2511 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2512 pPage->cModifications = 0;
2513}
2514
2515
2516/**
2517 * Zaps the list of modified pages, resetting their modification counters in the process.
2518 *
2519 * @param pVM The VM handle.
2520 */
2521static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2522{
2523 pgmLock(pVM);
2524 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2525 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2526
2527 unsigned cPages = 0; NOREF(cPages);
2528
2529#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2530 pgmPoolResetDirtyPages(pVM);
2531#endif
2532
2533 uint16_t idx = pPool->iModifiedHead;
2534 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2535 while (idx != NIL_PGMPOOL_IDX)
2536 {
2537 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2538 idx = pPage->iModifiedNext;
2539 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2540 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2541 pPage->cModifications = 0;
2542 Assert(++cPages);
2543 }
2544 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2545 pPool->cModifiedPages = 0;
2546 pgmUnlock(pVM);
2547}
2548
2549
2550/**
2551 * Handle SyncCR3 pool tasks
2552 *
2553 * @returns VBox status code.
2554 * @retval VINF_SUCCESS if successfully added.
2555 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2556 * @param pVCpu The VMCPU handle.
2557 * @remark Should only be used when monitoring is available, thus placed in
2558 * the PGMPOOL_WITH_MONITORING #ifdef.
2559 */
2560int pgmPoolSyncCR3(PVMCPU pVCpu)
2561{
2562 PVM pVM = pVCpu->CTX_SUFF(pVM);
2563 LogFlow(("pgmPoolSyncCR3\n"));
2564
2565 /*
2566 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2567 * Occasionally we will have to clear all the shadow page tables because we wanted
2568 * to monitor a page which was mapped by too many shadowed page tables. This operation
2569 * sometimes refered to as a 'lightweight flush'.
2570 */
2571# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2572 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2573 pgmR3PoolClearAll(pVM);
2574# else /* !IN_RING3 */
2575 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2576 {
2577 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2578 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2579
2580 /* Make sure all other VCPUs return to ring 3. */
2581 if (pVM->cCpus > 1)
2582 {
2583 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2584 PGM_INVL_ALL_VCPU_TLBS(pVM);
2585 }
2586 return VINF_PGM_SYNC_CR3;
2587 }
2588# endif /* !IN_RING3 */
2589 else
2590 {
2591 pgmPoolMonitorModifiedClearAll(pVM);
2592
2593 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2594 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2595 {
2596 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2597 return pgmPoolSyncCR3(pVCpu);
2598 }
2599 }
2600 return VINF_SUCCESS;
2601}
2602
2603
2604/**
2605 * Frees up at least one user entry.
2606 *
2607 * @returns VBox status code.
2608 * @retval VINF_SUCCESS if successfully added.
2609 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2610 * @param pPool The pool.
2611 * @param iUser The user index.
2612 */
2613static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2614{
2615 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2616 /*
2617 * Just free cached pages in a braindead fashion.
2618 */
2619 /** @todo walk the age list backwards and free the first with usage. */
2620 int rc = VINF_SUCCESS;
2621 do
2622 {
2623 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2624 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2625 rc = rc2;
2626 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2627 return rc;
2628}
2629
2630
2631/**
2632 * Inserts a page into the cache.
2633 *
2634 * This will create user node for the page, insert it into the GCPhys
2635 * hash, and insert it into the age list.
2636 *
2637 * @returns VBox status code.
2638 * @retval VINF_SUCCESS if successfully added.
2639 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2640 * @param pPool The pool.
2641 * @param pPage The cached page.
2642 * @param GCPhys The GC physical address of the page we're gonna shadow.
2643 * @param iUser The user index.
2644 * @param iUserTable The user table index.
2645 */
2646DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2647{
2648 int rc = VINF_SUCCESS;
2649 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2650
2651 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2652
2653#ifdef VBOX_STRICT
2654 /*
2655 * Check that the entry doesn't already exists.
2656 */
2657 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2658 {
2659 uint16_t i = pPage->iUserHead;
2660 do
2661 {
2662 Assert(i < pPool->cMaxUsers);
2663 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2664 i = paUsers[i].iNext;
2665 } while (i != NIL_PGMPOOL_USER_INDEX);
2666 }
2667#endif
2668
2669 /*
2670 * Find free a user node.
2671 */
2672 uint16_t i = pPool->iUserFreeHead;
2673 if (i == NIL_PGMPOOL_USER_INDEX)
2674 {
2675 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2676 if (RT_FAILURE(rc))
2677 return rc;
2678 i = pPool->iUserFreeHead;
2679 }
2680
2681 /*
2682 * Unlink the user node from the free list,
2683 * initialize and insert it into the user list.
2684 */
2685 pPool->iUserFreeHead = paUsers[i].iNext;
2686 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2687 paUsers[i].iUser = iUser;
2688 paUsers[i].iUserTable = iUserTable;
2689 pPage->iUserHead = i;
2690
2691 /*
2692 * Insert into cache and enable monitoring of the guest page if enabled.
2693 *
2694 * Until we implement caching of all levels, including the CR3 one, we'll
2695 * have to make sure we don't try monitor & cache any recursive reuse of
2696 * a monitored CR3 page. Because all windows versions are doing this we'll
2697 * have to be able to do combined access monitoring, CR3 + PT and
2698 * PD + PT (guest PAE).
2699 *
2700 * Update:
2701 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2702 */
2703 const bool fCanBeMonitored = true;
2704 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2705 if (fCanBeMonitored)
2706 {
2707 rc = pgmPoolMonitorInsert(pPool, pPage);
2708 AssertRC(rc);
2709 }
2710 return rc;
2711}
2712
2713
2714/**
2715 * Adds a user reference to a page.
2716 *
2717 * This will move the page to the head of the
2718 *
2719 * @returns VBox status code.
2720 * @retval VINF_SUCCESS if successfully added.
2721 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2722 * @param pPool The pool.
2723 * @param pPage The cached page.
2724 * @param iUser The user index.
2725 * @param iUserTable The user table.
2726 */
2727static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2728{
2729 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2730
2731 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2732
2733# ifdef VBOX_STRICT
2734 /*
2735 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2736 */
2737 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2738 {
2739 uint16_t i = pPage->iUserHead;
2740 do
2741 {
2742 Assert(i < pPool->cMaxUsers);
2743 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2744 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2745 i = paUsers[i].iNext;
2746 } while (i != NIL_PGMPOOL_USER_INDEX);
2747 }
2748# endif
2749
2750 /*
2751 * Allocate a user node.
2752 */
2753 uint16_t i = pPool->iUserFreeHead;
2754 if (i == NIL_PGMPOOL_USER_INDEX)
2755 {
2756 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2757 if (RT_FAILURE(rc))
2758 return rc;
2759 i = pPool->iUserFreeHead;
2760 }
2761 pPool->iUserFreeHead = paUsers[i].iNext;
2762
2763 /*
2764 * Initialize the user node and insert it.
2765 */
2766 paUsers[i].iNext = pPage->iUserHead;
2767 paUsers[i].iUser = iUser;
2768 paUsers[i].iUserTable = iUserTable;
2769 pPage->iUserHead = i;
2770
2771# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2772 if (pPage->fDirty)
2773 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2774# endif
2775
2776 /*
2777 * Tell the cache to update its replacement stats for this page.
2778 */
2779 pgmPoolCacheUsed(pPool, pPage);
2780 return VINF_SUCCESS;
2781}
2782
2783
2784/**
2785 * Frees a user record associated with a page.
2786 *
2787 * This does not clear the entry in the user table, it simply replaces the
2788 * user record to the chain of free records.
2789 *
2790 * @param pPool The pool.
2791 * @param HCPhys The HC physical address of the shadow page.
2792 * @param iUser The shadow page pool index of the user table.
2793 * @param iUserTable The index into the user table (shadowed).
2794 */
2795static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2796{
2797 /*
2798 * Unlink and free the specified user entry.
2799 */
2800 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2801
2802 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2803 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2804 uint16_t i = pPage->iUserHead;
2805 if ( i != NIL_PGMPOOL_USER_INDEX
2806 && paUsers[i].iUser == iUser
2807 && paUsers[i].iUserTable == iUserTable)
2808 {
2809 pPage->iUserHead = paUsers[i].iNext;
2810
2811 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2812 paUsers[i].iNext = pPool->iUserFreeHead;
2813 pPool->iUserFreeHead = i;
2814 return;
2815 }
2816
2817 /* General: Linear search. */
2818 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2819 while (i != NIL_PGMPOOL_USER_INDEX)
2820 {
2821 if ( paUsers[i].iUser == iUser
2822 && paUsers[i].iUserTable == iUserTable)
2823 {
2824 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2825 paUsers[iPrev].iNext = paUsers[i].iNext;
2826 else
2827 pPage->iUserHead = paUsers[i].iNext;
2828
2829 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2830 paUsers[i].iNext = pPool->iUserFreeHead;
2831 pPool->iUserFreeHead = i;
2832 return;
2833 }
2834 iPrev = i;
2835 i = paUsers[i].iNext;
2836 }
2837
2838 /* Fatal: didn't find it */
2839 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2840 iUser, iUserTable, pPage->GCPhys));
2841}
2842
2843
2844/**
2845 * Gets the entry size of a shadow table.
2846 *
2847 * @param enmKind The kind of page.
2848 *
2849 * @returns The size of the entry in bytes. That is, 4 or 8.
2850 * @returns If the kind is not for a table, an assertion is raised and 0 is
2851 * returned.
2852 */
2853DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2854{
2855 switch (enmKind)
2856 {
2857 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2858 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2859 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2860 case PGMPOOLKIND_32BIT_PD:
2861 case PGMPOOLKIND_32BIT_PD_PHYS:
2862 return 4;
2863
2864 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2865 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2866 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2867 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2868 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2869 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2870 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2871 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2872 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2873 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2874 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2875 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2876 case PGMPOOLKIND_64BIT_PML4:
2877 case PGMPOOLKIND_PAE_PDPT:
2878 case PGMPOOLKIND_ROOT_NESTED:
2879 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2880 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2881 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2882 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2883 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2884 case PGMPOOLKIND_PAE_PD_PHYS:
2885 case PGMPOOLKIND_PAE_PDPT_PHYS:
2886 return 8;
2887
2888 default:
2889 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2890 }
2891}
2892
2893
2894/**
2895 * Gets the entry size of a guest table.
2896 *
2897 * @param enmKind The kind of page.
2898 *
2899 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2900 * @returns If the kind is not for a table, an assertion is raised and 0 is
2901 * returned.
2902 */
2903DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2904{
2905 switch (enmKind)
2906 {
2907 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2908 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2909 case PGMPOOLKIND_32BIT_PD:
2910 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2911 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2912 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2913 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2914 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2915 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2916 return 4;
2917
2918 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2919 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2920 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2921 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2922 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2923 case PGMPOOLKIND_64BIT_PML4:
2924 case PGMPOOLKIND_PAE_PDPT:
2925 return 8;
2926
2927 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2928 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2929 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2930 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2931 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2932 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2933 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2934 case PGMPOOLKIND_ROOT_NESTED:
2935 case PGMPOOLKIND_PAE_PD_PHYS:
2936 case PGMPOOLKIND_PAE_PDPT_PHYS:
2937 case PGMPOOLKIND_32BIT_PD_PHYS:
2938 /** @todo can we return 0? (nobody is calling this...) */
2939 AssertFailed();
2940 return 0;
2941
2942 default:
2943 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2944 }
2945}
2946
2947
2948/**
2949 * Scans one shadow page table for mappings of a physical page.
2950 *
2951 * @returns true/false indicating removal of all relevant PTEs
2952 * @param pVM The VM handle.
2953 * @param pPhysPage The guest page in question.
2954 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
2955 * @param iShw The shadow page table.
2956 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
2957 * @param cRefs The number of references made in that PT.
2958 */
2959static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte, uint16_t cRefs)
2960{
2961 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d iPte=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte, cRefs));
2962 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2963 bool bRet = false;
2964
2965 /*
2966 * Assert sanity.
2967 */
2968 Assert(cRefs == 1);
2969 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
2970 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2971 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2972
2973 /*
2974 * Then, clear the actual mappings to the page in the shadow PT.
2975 */
2976 switch (pPage->enmKind)
2977 {
2978 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2979 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2980 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2981 {
2982 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2983 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2984 uint32_t u32AndMask, u32OrMask;
2985
2986 u32AndMask = 0;
2987 u32OrMask = 0;
2988
2989 if (!fFlushPTEs)
2990 {
2991 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
2992 {
2993 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
2994 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
2995 u32OrMask = X86_PTE_RW;
2996 u32AndMask = UINT32_MAX;
2997 bRet = true;
2998 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
2999 break;
3000
3001 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3002 u32OrMask = 0;
3003 u32AndMask = ~X86_PTE_RW;
3004 bRet = true;
3005 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3006 break;
3007 default:
3008 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3009 break;
3010 }
3011 }
3012 else
3013 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3014
3015 /* Update the counter if we're removing references. */
3016 if (!u32AndMask)
3017 {
3018 Assert(pPage->cPresent >= cRefs);
3019 Assert(pPool->cPresent >= cRefs);
3020 pPage->cPresent -= cRefs;
3021 pPool->cPresent -= cRefs;
3022 }
3023
3024 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3025 {
3026 X86PTE Pte;
3027
3028 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", iPte, pPT->a[iPte], cRefs));
3029 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3030 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3031 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3032
3033 ASMAtomicWriteSize(&pPT->a[iPte].u, Pte.u);
3034 return bRet;
3035 }
3036#ifdef LOG_ENABLED
3037 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3038 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3039 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3040 {
3041 Log(("i=%d cRefs=%d\n", i, cRefs--));
3042 }
3043#endif
3044 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3045 break;
3046 }
3047
3048 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3049 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3050 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3051 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3052 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3053 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3054 {
3055 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3056 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3057 uint64_t u64AndMask, u64OrMask;
3058
3059 u64OrMask = 0;
3060 u64AndMask = 0;
3061 if (!fFlushPTEs)
3062 {
3063 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3064 {
3065 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3066 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3067 u64OrMask = X86_PTE_RW;
3068 u64AndMask = UINT64_MAX;
3069 bRet = true;
3070 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3071 break;
3072
3073 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3074 u64OrMask = 0;
3075 u64AndMask = ~((uint64_t)X86_PTE_RW);
3076 bRet = true;
3077 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3078 break;
3079
3080 default:
3081 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3082 break;
3083 }
3084 }
3085 else
3086 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3087
3088 /* Update the counter if we're removing references. */
3089 if (!u64AndMask)
3090 {
3091 Assert(pPage->cPresent >= cRefs);
3092 Assert(pPool->cPresent >= cRefs);
3093 pPage->cPresent -= cRefs;
3094 pPool->cPresent -= cRefs;
3095 }
3096
3097 if ((pPT->a[iPte].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3098 {
3099 X86PTEPAE Pte;
3100
3101 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", iPte, pPT->a[iPte], cRefs));
3102 Pte.u = (pPT->a[iPte].u & u64AndMask) | u64OrMask;
3103 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3104 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3105
3106 ASMAtomicWriteSize(&pPT->a[iPte].u, Pte.u);
3107 return bRet;
3108 }
3109#ifdef LOG_ENABLED
3110 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3111 Log(("Found %RX64 expected %RX64\n", pPT->a[iPte].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P), u64));
3112 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3113 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3114 {
3115 Log(("i=%d cRefs=%d\n", i, cRefs--));
3116 }
3117#endif
3118 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind));
3119 break;
3120 }
3121
3122#ifdef PGM_WITH_LARGE_PAGES
3123 /* Large page case only. */
3124 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3125 {
3126 Assert(HWACCMIsNestedPagingActive(pVM));
3127
3128 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3129 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3130
3131 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3132 {
3133 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64 cRefs=%#x\n", iPte, pPD->a[iPte], cRefs));
3134 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3135 pPD->a[iPte].u = 0;
3136
3137 /* Update the counter as we're removing references. */
3138 Assert(pPage->cPresent);
3139 Assert(pPool->cPresent);
3140 pPage->cPresent--;
3141 pPool->cPresent--;
3142
3143 return bRet;
3144 }
3145# ifdef LOG_ENABLED
3146 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3147 for (unsigned i = 0; i < RT_ELEMENTS(pPD->a); i++)
3148 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3149 {
3150 Log(("i=%d cRefs=%d\n", i, cRefs--));
3151 }
3152# endif
3153 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3154 break;
3155 }
3156
3157 /* AMD-V nested paging - @todo merge with EPT as we only check the parts that are identical. */
3158 case PGMPOOLKIND_PAE_PD_PHYS:
3159 {
3160 Assert(HWACCMIsNestedPagingActive(pVM));
3161
3162 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3163 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3164
3165 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3166 {
3167 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64 cRefs=%#x\n", iPte, pPD->a[iPte], cRefs));
3168 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3169 pPD->a[iPte].u = 0;
3170
3171 /* Update the counter as we're removing references. */
3172 Assert(pPage->cPresent);
3173 Assert(pPool->cPresent);
3174 pPage->cPresent--;
3175 pPool->cPresent--;
3176 return bRet;
3177 }
3178# ifdef LOG_ENABLED
3179 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3180 for (unsigned i = 0; i < RT_ELEMENTS(pPD->a); i++)
3181 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3182 {
3183 Log(("i=%d cRefs=%d\n", i, cRefs--));
3184 }
3185# endif
3186 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3187 break;
3188 }
3189#endif /* PGM_WITH_LARGE_PAGES */
3190
3191 default:
3192 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3193 }
3194 return bRet;
3195}
3196
3197
3198/**
3199 * Scans one shadow page table for mappings of a physical page.
3200 *
3201 * @param pVM The VM handle.
3202 * @param pPhysPage The guest page in question.
3203 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3204 * @param iShw The shadow page table.
3205 * @param cRefs The number of references made in that PT.
3206 */
3207static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
3208{
3209 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3210
3211 /* We should only come here with when there's only one reference to this physical page. */
3212 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3213 Assert(cRefs == 1);
3214
3215 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
3216 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3217 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage), cRefs);
3218 if (!fKeptPTEs)
3219 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3220 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3221}
3222
3223
3224/**
3225 * Flushes a list of shadow page tables mapping the same physical page.
3226 *
3227 * @param pVM The VM handle.
3228 * @param pPhysPage The guest page in question.
3229 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3230 * @param iPhysExt The physical cross reference extent list to flush.
3231 */
3232static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3233{
3234 Assert(PGMIsLockOwner(pVM));
3235 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3236 bool fKeepList = false;
3237
3238 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3239 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3240
3241 const uint16_t iPhysExtStart = iPhysExt;
3242 PPGMPOOLPHYSEXT pPhysExt;
3243 do
3244 {
3245 Assert(iPhysExt < pPool->cMaxPhysExts);
3246 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3247 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3248 {
3249 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3250 {
3251 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i], 1);
3252 if (!fKeptPTEs)
3253 {
3254 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3255 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3256 }
3257 else
3258 fKeepList = true;
3259 }
3260 }
3261 /* next */
3262 iPhysExt = pPhysExt->iNext;
3263 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3264
3265 if (!fKeepList)
3266 {
3267 /* insert the list into the free list and clear the ram range entry. */
3268 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3269 pPool->iPhysExtFreeHead = iPhysExtStart;
3270 /* Invalidate the tracking data. */
3271 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3272 }
3273
3274 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3275}
3276
3277
3278/**
3279 * Flushes all shadow page table mappings of the given guest page.
3280 *
3281 * This is typically called when the host page backing the guest one has been
3282 * replaced or when the page protection was changed due to an access handler.
3283 *
3284 * @returns VBox status code.
3285 * @retval VINF_SUCCESS if all references has been successfully cleared.
3286 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3287 * pool cleaning. FF and sync flags are set.
3288 *
3289 * @param pVM The VM handle.
3290 * @param GCPhysPage GC physical address of the page in question
3291 * @param pPhysPage The guest page in question.
3292 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3293 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3294 * flushed, it is NOT touched if this isn't necessary.
3295 * The caller MUST initialized this to @a false.
3296 */
3297int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3298{
3299 PVMCPU pVCpu = VMMGetCpu(pVM);
3300 pgmLock(pVM);
3301 int rc = VINF_SUCCESS;
3302
3303#ifdef PGM_WITH_LARGE_PAGES
3304 /* Is this page part of a large page? */
3305 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3306 {
3307 PPGMPAGE pPhysBase;
3308 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3309
3310 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3311
3312 /* Fetch the large page base. */
3313 if (GCPhysBase != GCPhysPage)
3314 {
3315 pPhysBase = pgmPhysGetPage(&pVM->pgm.s, GCPhysBase);
3316 AssertFatal(pPhysBase);
3317 }
3318 else
3319 pPhysBase = pPhysPage;
3320
3321 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3322
3323 if (PGM_PAGE_GET_PDE_TYPE(pPhysBase) == PGM_PAGE_PDE_TYPE_PDE)
3324 {
3325 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3326 PGM_PAGE_SET_PDE_TYPE(pPhysBase, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3327
3328 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3329 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pPhysBase, fFlushPTEs, pfFlushTLBs);
3330
3331 *pfFlushTLBs = true;
3332 pgmUnlock(pVM);
3333 return rc;
3334 }
3335 }
3336#else
3337 NOREF(GCPhysPage);
3338#endif /* PGM_WITH_LARGE_PAGES */
3339
3340 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3341 if (u16)
3342 {
3343 /*
3344 * The zero page is currently screwing up the tracking and we'll
3345 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3346 * is defined, zero pages won't normally be mapped. Some kind of solution
3347 * will be needed for this problem of course, but it will have to wait...
3348 */
3349 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3350 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3351 rc = VINF_PGM_GCPHYS_ALIASED;
3352 else
3353 {
3354# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3355 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3356 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3357 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3358# endif
3359
3360 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3361 pgmPoolTrackFlushGCPhysPT(pVM,
3362 pPhysPage,
3363 fFlushPTEs,
3364 PGMPOOL_TD_GET_IDX(u16),
3365 PGMPOOL_TD_GET_CREFS(u16));
3366 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3367 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3368 else
3369 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3370 *pfFlushTLBs = true;
3371
3372# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3373 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3374# endif
3375 }
3376 }
3377
3378 if (rc == VINF_PGM_GCPHYS_ALIASED)
3379 {
3380 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3381 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3382 rc = VINF_PGM_SYNC_CR3;
3383 }
3384 pgmUnlock(pVM);
3385 return rc;
3386}
3387
3388
3389/**
3390 * Scans all shadow page tables for mappings of a physical page.
3391 *
3392 * This may be slow, but it's most likely more efficient than cleaning
3393 * out the entire page pool / cache.
3394 *
3395 * @returns VBox status code.
3396 * @retval VINF_SUCCESS if all references has been successfully cleared.
3397 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3398 * a page pool cleaning.
3399 *
3400 * @param pVM The VM handle.
3401 * @param pPhysPage The guest page in question.
3402 */
3403int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3404{
3405 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3406 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3407 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3408 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3409
3410 /*
3411 * There is a limit to what makes sense.
3412 */
3413 if ( pPool->cPresent > 1024
3414 && pVM->cCpus == 1)
3415 {
3416 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3417 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3418 return VINF_PGM_GCPHYS_ALIASED;
3419 }
3420
3421 /*
3422 * Iterate all the pages until we've encountered all that in use.
3423 * This is simple but not quite optimal solution.
3424 */
3425 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3426 const uint32_t u32 = u64;
3427 unsigned cLeft = pPool->cUsedPages;
3428 unsigned iPage = pPool->cCurPages;
3429 while (--iPage >= PGMPOOL_IDX_FIRST)
3430 {
3431 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3432 if ( pPage->GCPhys != NIL_RTGCPHYS
3433 && pPage->cPresent)
3434 {
3435 switch (pPage->enmKind)
3436 {
3437 /*
3438 * We only care about shadow page tables.
3439 */
3440 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3441 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3442 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3443 {
3444 unsigned cPresent = pPage->cPresent;
3445 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3446 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3447 if (pPT->a[i].n.u1Present)
3448 {
3449 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3450 {
3451 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3452 pPT->a[i].u = 0;
3453
3454 /* Update the counter as we're removing references. */
3455 Assert(pPage->cPresent);
3456 Assert(pPool->cPresent);
3457 pPage->cPresent--;
3458 pPool->cPresent--;
3459 }
3460 if (!--cPresent)
3461 break;
3462 }
3463 break;
3464 }
3465
3466 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3467 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3468 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3469 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3470 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3471 {
3472 unsigned cPresent = pPage->cPresent;
3473 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3474 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3475 if (pPT->a[i].n.u1Present)
3476 {
3477 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3478 {
3479 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3480 pPT->a[i].u = 0;
3481
3482 /* Update the counter as we're removing references. */
3483 Assert(pPage->cPresent);
3484 Assert(pPool->cPresent);
3485 pPage->cPresent--;
3486 pPool->cPresent--;
3487 }
3488 if (!--cPresent)
3489 break;
3490 }
3491 break;
3492 }
3493#ifndef IN_RC
3494 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3495 {
3496 unsigned cPresent = pPage->cPresent;
3497 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3498 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3499 if (pPT->a[i].n.u1Present)
3500 {
3501 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3502 {
3503 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3504 pPT->a[i].u = 0;
3505
3506 /* Update the counter as we're removing references. */
3507 Assert(pPage->cPresent);
3508 Assert(pPool->cPresent);
3509 pPage->cPresent--;
3510 pPool->cPresent--;
3511 }
3512 if (!--cPresent)
3513 break;
3514 }
3515 break;
3516 }
3517#endif
3518 }
3519 if (!--cLeft)
3520 break;
3521 }
3522 }
3523
3524 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3525 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3526
3527 /*
3528 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3529 */
3530 if (pPool->cPresent > 1024)
3531 {
3532 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3533 return VINF_PGM_GCPHYS_ALIASED;
3534 }
3535
3536 return VINF_SUCCESS;
3537}
3538
3539
3540/**
3541 * Clears the user entry in a user table.
3542 *
3543 * This is used to remove all references to a page when flushing it.
3544 */
3545static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3546{
3547 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3548 Assert(pUser->iUser < pPool->cCurPages);
3549 uint32_t iUserTable = pUser->iUserTable;
3550
3551 /*
3552 * Map the user page.
3553 */
3554 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3555 union
3556 {
3557 uint64_t *pau64;
3558 uint32_t *pau32;
3559 } u;
3560 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3561
3562 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3563
3564 /* Safety precaution in case we change the paging for other modes too in the future. */
3565 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3566
3567#ifdef VBOX_STRICT
3568 /*
3569 * Some sanity checks.
3570 */
3571 switch (pUserPage->enmKind)
3572 {
3573 case PGMPOOLKIND_32BIT_PD:
3574 case PGMPOOLKIND_32BIT_PD_PHYS:
3575 Assert(iUserTable < X86_PG_ENTRIES);
3576 break;
3577 case PGMPOOLKIND_PAE_PDPT:
3578 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3579 case PGMPOOLKIND_PAE_PDPT_PHYS:
3580 Assert(iUserTable < 4);
3581 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3582 break;
3583 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3584 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3585 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3586 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3587 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3588 case PGMPOOLKIND_PAE_PD_PHYS:
3589 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3590 break;
3591 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3592 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3593 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3594 break;
3595 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3596 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3597 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3598 break;
3599 case PGMPOOLKIND_64BIT_PML4:
3600 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3601 /* GCPhys >> PAGE_SHIFT is the index here */
3602 break;
3603 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3604 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3605 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3606 break;
3607
3608 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3609 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3610 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3611 break;
3612
3613 case PGMPOOLKIND_ROOT_NESTED:
3614 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3615 break;
3616
3617 default:
3618 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3619 break;
3620 }
3621#endif /* VBOX_STRICT */
3622
3623 /*
3624 * Clear the entry in the user page.
3625 */
3626 switch (pUserPage->enmKind)
3627 {
3628 /* 32-bit entries */
3629 case PGMPOOLKIND_32BIT_PD:
3630 case PGMPOOLKIND_32BIT_PD_PHYS:
3631 ASMAtomicWriteSize(&u.pau32[iUserTable], 0);
3632 break;
3633
3634 /* 64-bit entries */
3635 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3636 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3637 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3638 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3639 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3640#if defined(IN_RC)
3641 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3642 * non-present PDPT will continue to cause page faults.
3643 */
3644 ASMReloadCR3();
3645#endif
3646 /* no break */
3647 case PGMPOOLKIND_PAE_PD_PHYS:
3648 case PGMPOOLKIND_PAE_PDPT_PHYS:
3649 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3650 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3651 case PGMPOOLKIND_64BIT_PML4:
3652 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3653 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3654 case PGMPOOLKIND_PAE_PDPT:
3655 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3656 case PGMPOOLKIND_ROOT_NESTED:
3657 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3658 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3659 ASMAtomicWriteSize(&u.pau64[iUserTable], 0);
3660 break;
3661
3662 default:
3663 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3664 }
3665}
3666
3667
3668/**
3669 * Clears all users of a page.
3670 */
3671static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3672{
3673 /*
3674 * Free all the user records.
3675 */
3676 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3677
3678 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3679 uint16_t i = pPage->iUserHead;
3680 while (i != NIL_PGMPOOL_USER_INDEX)
3681 {
3682 /* Clear enter in user table. */
3683 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3684
3685 /* Free it. */
3686 const uint16_t iNext = paUsers[i].iNext;
3687 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3688 paUsers[i].iNext = pPool->iUserFreeHead;
3689 pPool->iUserFreeHead = i;
3690
3691 /* Next. */
3692 i = iNext;
3693 }
3694 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3695}
3696
3697
3698/**
3699 * Allocates a new physical cross reference extent.
3700 *
3701 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3702 * @param pVM The VM handle.
3703 * @param piPhysExt Where to store the phys ext index.
3704 */
3705PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3706{
3707 Assert(PGMIsLockOwner(pVM));
3708 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3709 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3710 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3711 {
3712 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3713 return NULL;
3714 }
3715 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3716 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3717 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3718 *piPhysExt = iPhysExt;
3719 return pPhysExt;
3720}
3721
3722
3723/**
3724 * Frees a physical cross reference extent.
3725 *
3726 * @param pVM The VM handle.
3727 * @param iPhysExt The extent to free.
3728 */
3729void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3730{
3731 Assert(PGMIsLockOwner(pVM));
3732 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3733 Assert(iPhysExt < pPool->cMaxPhysExts);
3734 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3735 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3736 {
3737 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3738 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3739 }
3740 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3741 pPool->iPhysExtFreeHead = iPhysExt;
3742}
3743
3744
3745/**
3746 * Frees a physical cross reference extent.
3747 *
3748 * @param pVM The VM handle.
3749 * @param iPhysExt The extent to free.
3750 */
3751void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3752{
3753 Assert(PGMIsLockOwner(pVM));
3754 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3755
3756 const uint16_t iPhysExtStart = iPhysExt;
3757 PPGMPOOLPHYSEXT pPhysExt;
3758 do
3759 {
3760 Assert(iPhysExt < pPool->cMaxPhysExts);
3761 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3762 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3763 {
3764 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3765 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3766 }
3767
3768 /* next */
3769 iPhysExt = pPhysExt->iNext;
3770 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3771
3772 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3773 pPool->iPhysExtFreeHead = iPhysExtStart;
3774}
3775
3776
3777/**
3778 * Insert a reference into a list of physical cross reference extents.
3779 *
3780 * @returns The new tracking data for PGMPAGE.
3781 *
3782 * @param pVM The VM handle.
3783 * @param iPhysExt The physical extent index of the list head.
3784 * @param iShwPT The shadow page table index.
3785 * @param iPte Page table entry
3786 *
3787 */
3788static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3789{
3790 Assert(PGMIsLockOwner(pVM));
3791 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3792 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3793
3794 /* special common case. */
3795 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3796 {
3797 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3798 paPhysExts[iPhysExt].apte[2] = iPte;
3799 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3800 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3801 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3802 }
3803
3804 /* general treatment. */
3805 const uint16_t iPhysExtStart = iPhysExt;
3806 unsigned cMax = 15;
3807 for (;;)
3808 {
3809 Assert(iPhysExt < pPool->cMaxPhysExts);
3810 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3811 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3812 {
3813 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3814 paPhysExts[iPhysExt].apte[i] = iPte;
3815 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3816 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3817 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3818 }
3819 if (!--cMax)
3820 {
3821 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3822 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3823 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3824 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3825 }
3826 }
3827
3828 /* add another extent to the list. */
3829 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3830 if (!pNew)
3831 {
3832 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackNoExtentsLeft);
3833 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3834 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3835 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3836 }
3837 pNew->iNext = iPhysExtStart;
3838 pNew->aidx[0] = iShwPT;
3839 pNew->apte[0] = iPte;
3840 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
3841 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3842}
3843
3844
3845/**
3846 * Add a reference to guest physical page where extents are in use.
3847 *
3848 * @returns The new tracking data for PGMPAGE.
3849 *
3850 * @param pVM The VM handle.
3851 * @param pPhysPage Pointer to the aPages entry in the ram range.
3852 * @param u16 The ram range flags (top 16-bits).
3853 * @param iShwPT The shadow page table index.
3854 * @param iPte Page table entry
3855 */
3856uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
3857{
3858 pgmLock(pVM);
3859 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3860 {
3861 /*
3862 * Convert to extent list.
3863 */
3864 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3865 uint16_t iPhysExt;
3866 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3867 if (pPhysExt)
3868 {
3869 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3870 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3871 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3872 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
3873 pPhysExt->aidx[1] = iShwPT;
3874 pPhysExt->apte[1] = iPte;
3875 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3876 }
3877 else
3878 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3879 }
3880 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3881 {
3882 /*
3883 * Insert into the extent list.
3884 */
3885 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
3886 }
3887 else
3888 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3889 pgmUnlock(pVM);
3890 return u16;
3891}
3892
3893/**
3894 * Clear references to guest physical memory.
3895 *
3896 * @param pPool The pool.
3897 * @param pPage The page.
3898 * @param pPhysPage Pointer to the aPages entry in the ram range.
3899 * @param iPte Shadow PTE index
3900 */
3901void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
3902{
3903 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3904 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3905
3906 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3907 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3908 {
3909 PVM pVM = pPool->CTX_SUFF(pVM);
3910 pgmLock(pVM);
3911
3912 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3913 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3914 do
3915 {
3916 Assert(iPhysExt < pPool->cMaxPhysExts);
3917
3918 /*
3919 * Look for the shadow page and check if it's all freed.
3920 */
3921 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3922 {
3923 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
3924 && paPhysExts[iPhysExt].apte[i] == iPte)
3925 {
3926 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3927 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3928
3929 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3930 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3931 {
3932 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3933 pgmUnlock(pVM);
3934 return;
3935 }
3936
3937 /* we can free the node. */
3938 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3939 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3940 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3941 {
3942 /* lonely node */
3943 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3944 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3945 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3946 }
3947 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3948 {
3949 /* head */
3950 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3951 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3952 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3953 }
3954 else
3955 {
3956 /* in list */
3957 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
3958 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3959 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3960 }
3961 iPhysExt = iPhysExtNext;
3962 pgmUnlock(pVM);
3963 return;
3964 }
3965 }
3966
3967 /* next */
3968 iPhysExtPrev = iPhysExt;
3969 iPhysExt = paPhysExts[iPhysExt].iNext;
3970 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3971
3972 pgmUnlock(pVM);
3973 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3974 }
3975 else /* nothing to do */
3976 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3977}
3978
3979/**
3980 * Clear references to guest physical memory.
3981 *
3982 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3983 * is assumed to be correct, so the linear search can be skipped and we can assert
3984 * at an earlier point.
3985 *
3986 * @param pPool The pool.
3987 * @param pPage The page.
3988 * @param HCPhys The host physical address corresponding to the guest page.
3989 * @param GCPhys The guest physical address corresponding to HCPhys.
3990 * @param iPte Shadow PTE index
3991 */
3992static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
3993{
3994 /*
3995 * Walk range list.
3996 */
3997 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3998 while (pRam)
3999 {
4000 RTGCPHYS off = GCPhys - pRam->GCPhys;
4001 if (off < pRam->cb)
4002 {
4003 /* does it match? */
4004 const unsigned iPage = off >> PAGE_SHIFT;
4005 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4006#ifdef LOG_ENABLED
4007 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
4008 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4009#endif
4010 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4011 {
4012 Assert(pPage->cPresent);
4013 Assert(pPool->cPresent);
4014 pPage->cPresent--;
4015 pPool->cPresent--;
4016 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4017 return;
4018 }
4019 break;
4020 }
4021 pRam = pRam->CTX_SUFF(pNext);
4022 }
4023 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4024}
4025
4026
4027/**
4028 * Clear references to guest physical memory.
4029 *
4030 * @param pPool The pool.
4031 * @param pPage The page.
4032 * @param HCPhys The host physical address corresponding to the guest page.
4033 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4034 * @param iPte Shadow pte index
4035 */
4036void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4037{
4038 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4039
4040 /*
4041 * Walk range list.
4042 */
4043 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4044 while (pRam)
4045 {
4046 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
4047 if (off < pRam->cb)
4048 {
4049 /* does it match? */
4050 const unsigned iPage = off >> PAGE_SHIFT;
4051 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4052 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4053 {
4054 Assert(pPage->cPresent);
4055 Assert(pPool->cPresent);
4056 pPage->cPresent--;
4057 pPool->cPresent--;
4058 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4059 return;
4060 }
4061 break;
4062 }
4063 pRam = pRam->CTX_SUFF(pNext);
4064 }
4065
4066 /*
4067 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4068 */
4069 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4070 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4071 while (pRam)
4072 {
4073 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4074 while (iPage-- > 0)
4075 {
4076 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4077 {
4078 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4079 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4080 Assert(pPage->cPresent);
4081 Assert(pPool->cPresent);
4082 pPage->cPresent--;
4083 pPool->cPresent--;
4084 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4085 return;
4086 }
4087 }
4088 pRam = pRam->CTX_SUFF(pNext);
4089 }
4090
4091 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
4092}
4093
4094
4095/**
4096 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4097 *
4098 * @param pPool The pool.
4099 * @param pPage The page.
4100 * @param pShwPT The shadow page table (mapping of the page).
4101 * @param pGstPT The guest page table.
4102 */
4103DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4104{
4105 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4106 if (pShwPT->a[i].n.u1Present)
4107 {
4108 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4109 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4110 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4111 if (!pPage->cPresent)
4112 break;
4113 }
4114}
4115
4116
4117/**
4118 * Clear references to guest physical memory in a PAE / 32-bit page table.
4119 *
4120 * @param pPool The pool.
4121 * @param pPage The page.
4122 * @param pShwPT The shadow page table (mapping of the page).
4123 * @param pGstPT The guest page table (just a half one).
4124 */
4125DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
4126{
4127 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4128 if (pShwPT->a[i].n.u1Present)
4129 {
4130 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4131 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4132 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4133 if (!pPage->cPresent)
4134 break;
4135 }
4136}
4137
4138
4139/**
4140 * Clear references to guest physical memory in a PAE / PAE page table.
4141 *
4142 * @param pPool The pool.
4143 * @param pPage The page.
4144 * @param pShwPT The shadow page table (mapping of the page).
4145 * @param pGstPT The guest page table.
4146 */
4147DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
4148{
4149 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4150 if (pShwPT->a[i].n.u1Present)
4151 {
4152 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4153 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4154 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
4155 if (!pPage->cPresent)
4156 break;
4157 }
4158}
4159
4160
4161/**
4162 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4163 *
4164 * @param pPool The pool.
4165 * @param pPage The page.
4166 * @param pShwPT The shadow page table (mapping of the page).
4167 */
4168DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4169{
4170 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4171 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4172 if (pShwPT->a[i].n.u1Present)
4173 {
4174 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4175 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4176 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys, i);
4177 if (!pPage->cPresent)
4178 break;
4179 }
4180}
4181
4182
4183/**
4184 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4185 *
4186 * @param pPool The pool.
4187 * @param pPage The page.
4188 * @param pShwPT The shadow page table (mapping of the page).
4189 */
4190DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
4191{
4192 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4193 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4194 if (pShwPT->a[i].n.u1Present)
4195 {
4196 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4197 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
4198 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys, i);
4199 if (!pPage->cPresent)
4200 break;
4201 }
4202}
4203
4204
4205/**
4206 * Clear references to shadowed pages in an EPT page table.
4207 *
4208 * @param pPool The pool.
4209 * @param pPage The page.
4210 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4211 */
4212DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4213{
4214 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4215 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4216 if (pShwPT->a[i].n.u1Present)
4217 {
4218 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4219 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4220 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys, i);
4221 if (!pPage->cPresent)
4222 break;
4223 }
4224}
4225
4226
4227
4228/**
4229 * Clear references to shadowed pages in a 32 bits page directory.
4230 *
4231 * @param pPool The pool.
4232 * @param pPage The page.
4233 * @param pShwPD The shadow page directory (mapping of the page).
4234 */
4235DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4236{
4237 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4238 {
4239 if ( pShwPD->a[i].n.u1Present
4240 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4241 )
4242 {
4243 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4244 if (pSubPage)
4245 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4246 else
4247 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4248 }
4249 }
4250}
4251
4252/**
4253 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4254 *
4255 * @param pPool The pool.
4256 * @param pPage The page.
4257 * @param pShwPD The shadow page directory (mapping of the page).
4258 */
4259DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4260{
4261 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4262 {
4263 if ( pShwPD->a[i].n.u1Present
4264 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4265 )
4266 {
4267#ifdef PGM_WITH_LARGE_PAGES
4268 if (pShwPD->a[i].b.u1Size)
4269 {
4270 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4271 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4272 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */, i);
4273 }
4274 else
4275#endif
4276 {
4277 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4278 if (pSubPage)
4279 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4280 else
4281 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4282 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4283 }
4284 }
4285 }
4286}
4287
4288/**
4289 * Clear references to shadowed pages in a PAE page directory pointer table.
4290 *
4291 * @param pPool The pool.
4292 * @param pPage The page.
4293 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4294 */
4295DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4296{
4297 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4298 {
4299 if ( pShwPDPT->a[i].n.u1Present
4300 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4301 )
4302 {
4303 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4304 if (pSubPage)
4305 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4306 else
4307 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4308 }
4309 }
4310}
4311
4312
4313/**
4314 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4315 *
4316 * @param pPool The pool.
4317 * @param pPage The page.
4318 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4319 */
4320DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4321{
4322 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4323 {
4324 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
4325 if (pShwPDPT->a[i].n.u1Present)
4326 {
4327 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4328 if (pSubPage)
4329 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4330 else
4331 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4332 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4333 }
4334 }
4335}
4336
4337
4338/**
4339 * Clear references to shadowed pages in a 64-bit level 4 page table.
4340 *
4341 * @param pPool The pool.
4342 * @param pPage The page.
4343 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4344 */
4345DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4346{
4347 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4348 {
4349 if (pShwPML4->a[i].n.u1Present)
4350 {
4351 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4352 if (pSubPage)
4353 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4354 else
4355 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4356 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4357 }
4358 }
4359}
4360
4361
4362/**
4363 * Clear references to shadowed pages in an EPT page directory.
4364 *
4365 * @param pPool The pool.
4366 * @param pPage The page.
4367 * @param pShwPD The shadow page directory (mapping of the page).
4368 */
4369DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4370{
4371 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4372 {
4373 if (pShwPD->a[i].n.u1Present)
4374 {
4375#ifdef PGM_WITH_LARGE_PAGES
4376 if (pShwPD->a[i].b.u1Size)
4377 {
4378 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4379 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4380 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */, i);
4381 }
4382 else
4383#endif
4384 {
4385 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4386 if (pSubPage)
4387 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4388 else
4389 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4390 }
4391 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4392 }
4393 }
4394}
4395
4396
4397/**
4398 * Clear references to shadowed pages in an EPT page directory pointer table.
4399 *
4400 * @param pPool The pool.
4401 * @param pPage The page.
4402 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4403 */
4404DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4405{
4406 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4407 {
4408 if (pShwPDPT->a[i].n.u1Present)
4409 {
4410 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4411 if (pSubPage)
4412 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4413 else
4414 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4415 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4416 }
4417 }
4418}
4419
4420
4421/**
4422 * Clears all references made by this page.
4423 *
4424 * This includes other shadow pages and GC physical addresses.
4425 *
4426 * @param pPool The pool.
4427 * @param pPage The page.
4428 */
4429static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4430{
4431 /*
4432 * Map the shadow page and take action according to the page kind.
4433 */
4434 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
4435 switch (pPage->enmKind)
4436 {
4437 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4438 {
4439 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4440 void *pvGst;
4441 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4442 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4443 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4444 break;
4445 }
4446
4447 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4448 {
4449 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4450 void *pvGst;
4451 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4452 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
4453 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4454 break;
4455 }
4456
4457 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4458 {
4459 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4460 void *pvGst;
4461 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4462 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
4463 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4464 break;
4465 }
4466
4467 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4468 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4469 {
4470 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4471 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4472 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4473 break;
4474 }
4475
4476 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4477 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4478 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4479 {
4480 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4481 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4482 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4483 break;
4484 }
4485
4486 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4487 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4488 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4489 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4490 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4491 case PGMPOOLKIND_PAE_PD_PHYS:
4492 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4493 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4494 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4495 break;
4496
4497 case PGMPOOLKIND_32BIT_PD_PHYS:
4498 case PGMPOOLKIND_32BIT_PD:
4499 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4500 break;
4501
4502 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4503 case PGMPOOLKIND_PAE_PDPT:
4504 case PGMPOOLKIND_PAE_PDPT_PHYS:
4505 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4506 break;
4507
4508 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4509 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4510 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4511 break;
4512
4513 case PGMPOOLKIND_64BIT_PML4:
4514 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4515 break;
4516
4517 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4518 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4519 break;
4520
4521 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4522 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4523 break;
4524
4525 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4526 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4527 break;
4528
4529 default:
4530 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4531 }
4532
4533 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4534 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4535 ASMMemZeroPage(pvShw);
4536 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4537 pPage->fZeroed = true;
4538 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4539 Assert(!pPage->cPresent);
4540}
4541
4542/**
4543 * Flushes a pool page.
4544 *
4545 * This moves the page to the free list after removing all user references to it.
4546 *
4547 * @returns VBox status code.
4548 * @retval VINF_SUCCESS on success.
4549 * @param pPool The pool.
4550 * @param HCPhys The HC physical address of the shadow page.
4551 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4552 */
4553int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4554{
4555 PVM pVM = pPool->CTX_SUFF(pVM);
4556 bool fFlushRequired = false;
4557
4558 int rc = VINF_SUCCESS;
4559 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4560 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4561 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4562
4563 /*
4564 * Quietly reject any attempts at flushing any of the special root pages.
4565 */
4566 if (pPage->idx < PGMPOOL_IDX_FIRST)
4567 {
4568 AssertFailed(); /* can no longer happen */
4569 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4570 return VINF_SUCCESS;
4571 }
4572
4573 pgmLock(pVM);
4574
4575 /*
4576 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4577 */
4578 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4579 {
4580 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4581 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4582 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4583 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4584 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4585 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4586 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4587 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4588 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4589 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4590 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4591 pgmUnlock(pVM);
4592 return VINF_SUCCESS;
4593 }
4594
4595#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4596 /* Start a subset so we won't run out of mapping space. */
4597 PVMCPU pVCpu = VMMGetCpu(pVM);
4598 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4599#endif
4600
4601 /*
4602 * Mark the page as being in need of an ASMMemZeroPage().
4603 */
4604 pPage->fZeroed = false;
4605
4606#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4607 if (pPage->fDirty)
4608 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4609#endif
4610
4611 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4612 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4613 fFlushRequired = true;
4614
4615 /*
4616 * Clear the page.
4617 */
4618 pgmPoolTrackClearPageUsers(pPool, pPage);
4619 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4620 pgmPoolTrackDeref(pPool, pPage);
4621 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4622
4623 /*
4624 * Flush it from the cache.
4625 */
4626 pgmPoolCacheFlushPage(pPool, pPage);
4627
4628#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4629 /* Heavy stuff done. */
4630 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4631#endif
4632
4633 /*
4634 * Deregistering the monitoring.
4635 */
4636 if (pPage->fMonitored)
4637 rc = pgmPoolMonitorFlush(pPool, pPage);
4638
4639 /*
4640 * Free the page.
4641 */
4642 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4643 pPage->iNext = pPool->iFreeHead;
4644 pPool->iFreeHead = pPage->idx;
4645 pPage->enmKind = PGMPOOLKIND_FREE;
4646 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4647 pPage->GCPhys = NIL_RTGCPHYS;
4648 pPage->fReusedFlushPending = false;
4649
4650 pPool->cUsedPages--;
4651
4652 /* Flush the TLBs of all VCPUs if required. */
4653 if ( fFlushRequired
4654 && fFlush)
4655 {
4656 PGM_INVL_ALL_VCPU_TLBS(pVM);
4657 }
4658
4659 pgmUnlock(pVM);
4660 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4661 return rc;
4662}
4663
4664
4665/**
4666 * Frees a usage of a pool page.
4667 *
4668 * The caller is responsible to updating the user table so that it no longer
4669 * references the shadow page.
4670 *
4671 * @param pPool The pool.
4672 * @param HCPhys The HC physical address of the shadow page.
4673 * @param iUser The shadow page pool index of the user table.
4674 * @param iUserTable The index into the user table (shadowed).
4675 */
4676void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4677{
4678 PVM pVM = pPool->CTX_SUFF(pVM);
4679
4680 STAM_PROFILE_START(&pPool->StatFree, a);
4681 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4682 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4683 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4684 pgmLock(pVM);
4685 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4686 if (!pPage->fCached)
4687 pgmPoolFlushPage(pPool, pPage);
4688 pgmUnlock(pVM);
4689 STAM_PROFILE_STOP(&pPool->StatFree, a);
4690}
4691
4692
4693/**
4694 * Makes one or more free page free.
4695 *
4696 * @returns VBox status code.
4697 * @retval VINF_SUCCESS on success.
4698 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4699 *
4700 * @param pPool The pool.
4701 * @param enmKind Page table kind
4702 * @param iUser The user of the page.
4703 */
4704static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4705{
4706 PVM pVM = pPool->CTX_SUFF(pVM);
4707
4708 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4709
4710 /*
4711 * If the pool isn't full grown yet, expand it.
4712 */
4713 if ( pPool->cCurPages < pPool->cMaxPages
4714#if defined(IN_RC)
4715 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4716 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4717 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4718#endif
4719 )
4720 {
4721 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4722#ifdef IN_RING3
4723 int rc = PGMR3PoolGrow(pVM);
4724#else
4725 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4726#endif
4727 if (RT_FAILURE(rc))
4728 return rc;
4729 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4730 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4731 return VINF_SUCCESS;
4732 }
4733
4734 /*
4735 * Free one cached page.
4736 */
4737 return pgmPoolCacheFreeOne(pPool, iUser);
4738}
4739
4740/**
4741 * Allocates a page from the pool.
4742 *
4743 * This page may actually be a cached page and not in need of any processing
4744 * on the callers part.
4745 *
4746 * @returns VBox status code.
4747 * @retval VINF_SUCCESS if a NEW page was allocated.
4748 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4749 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4750 * @param pVM The VM handle.
4751 * @param GCPhys The GC physical address of the page we're gonna shadow.
4752 * For 4MB and 2MB PD entries, it's the first address the
4753 * shadow PT is covering.
4754 * @param enmKind The kind of mapping.
4755 * @param enmAccess Access type for the mapping (only relevant for big pages)
4756 * @param iUser The shadow page pool index of the user table.
4757 * @param iUserTable The index into the user table (shadowed).
4758 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4759 * @param fLockPage Lock the page
4760 */
4761int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4762{
4763 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4764 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4765 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4766 *ppPage = NULL;
4767 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4768 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4769 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4770
4771 pgmLock(pVM);
4772
4773 if (pPool->fCacheEnabled)
4774 {
4775 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4776 if (RT_SUCCESS(rc2))
4777 {
4778 if (fLockPage)
4779 pgmPoolLockPage(pPool, *ppPage);
4780 pgmUnlock(pVM);
4781 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4782 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4783 return rc2;
4784 }
4785 }
4786
4787 /*
4788 * Allocate a new one.
4789 */
4790 int rc = VINF_SUCCESS;
4791 uint16_t iNew = pPool->iFreeHead;
4792 if (iNew == NIL_PGMPOOL_IDX)
4793 {
4794 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4795 if (RT_FAILURE(rc))
4796 {
4797 pgmUnlock(pVM);
4798 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4799 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4800 return rc;
4801 }
4802 iNew = pPool->iFreeHead;
4803 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4804 }
4805
4806 /* unlink the free head */
4807 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4808 pPool->iFreeHead = pPage->iNext;
4809 pPage->iNext = NIL_PGMPOOL_IDX;
4810
4811 /*
4812 * Initialize it.
4813 */
4814 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4815 pPage->enmKind = enmKind;
4816 pPage->enmAccess = enmAccess;
4817 pPage->GCPhys = GCPhys;
4818 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4819 pPage->fMonitored = false;
4820 pPage->fCached = false;
4821#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4822 pPage->fDirty = false;
4823#endif
4824 pPage->fReusedFlushPending = false;
4825 pPage->cModifications = 0;
4826 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4827 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4828 pPage->cLocked = 0;
4829 pPage->cPresent = 0;
4830 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
4831 pPage->pvLastAccessHandlerFault = 0;
4832 pPage->cLastAccessHandlerCount = 0;
4833 pPage->pvLastAccessHandlerRip = 0;
4834
4835 /*
4836 * Insert into the tracking and cache. If this fails, free the page.
4837 */
4838 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4839 if (RT_FAILURE(rc3))
4840 {
4841 pPool->cUsedPages--;
4842 pPage->enmKind = PGMPOOLKIND_FREE;
4843 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4844 pPage->GCPhys = NIL_RTGCPHYS;
4845 pPage->iNext = pPool->iFreeHead;
4846 pPool->iFreeHead = pPage->idx;
4847 pgmUnlock(pVM);
4848 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4849 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4850 return rc3;
4851 }
4852
4853 /*
4854 * Commit the allocation, clear the page and return.
4855 */
4856#ifdef VBOX_WITH_STATISTICS
4857 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4858 pPool->cUsedPagesHigh = pPool->cUsedPages;
4859#endif
4860
4861 if (!pPage->fZeroed)
4862 {
4863 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4864 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4865 ASMMemZeroPage(pv);
4866 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4867 }
4868
4869 *ppPage = pPage;
4870 if (fLockPage)
4871 pgmPoolLockPage(pPool, pPage);
4872 pgmUnlock(pVM);
4873 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4874 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4875 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4876 return rc;
4877}
4878
4879
4880/**
4881 * Frees a usage of a pool page.
4882 *
4883 * @param pVM The VM handle.
4884 * @param HCPhys The HC physical address of the shadow page.
4885 * @param iUser The shadow page pool index of the user table.
4886 * @param iUserTable The index into the user table (shadowed).
4887 */
4888void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4889{
4890 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4891 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4892 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4893}
4894
4895/**
4896 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4897 *
4898 * @returns Pointer to the shadow page structure.
4899 * @param pPool The pool.
4900 * @param HCPhys The HC physical address of the shadow page.
4901 */
4902PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4903{
4904 PVM pVM = pPool->CTX_SUFF(pVM);
4905
4906 Assert(PGMIsLockOwner(pVM));
4907
4908 /*
4909 * Look up the page.
4910 */
4911 pgmLock(pVM);
4912 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4913 pgmUnlock(pVM);
4914
4915 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4916 return pPage;
4917}
4918
4919#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
4920/**
4921 * Flush the specified page if present
4922 *
4923 * @param pVM The VM handle.
4924 * @param GCPhys Guest physical address of the page to flush
4925 */
4926void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
4927{
4928 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4929
4930 VM_ASSERT_EMT(pVM);
4931
4932 /*
4933 * Look up the GCPhys in the hash.
4934 */
4935 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
4936 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
4937 if (i == NIL_PGMPOOL_IDX)
4938 return;
4939
4940 do
4941 {
4942 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4943 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
4944 {
4945 switch (pPage->enmKind)
4946 {
4947 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4948 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4949 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4950 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4951 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4952 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4953 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4954 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4955 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4956 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4957 case PGMPOOLKIND_64BIT_PML4:
4958 case PGMPOOLKIND_32BIT_PD:
4959 case PGMPOOLKIND_PAE_PDPT:
4960 {
4961 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
4962#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4963 if (pPage->fDirty)
4964 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
4965 else
4966#endif
4967 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
4968 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
4969 pgmPoolMonitorChainFlush(pPool, pPage);
4970 return;
4971 }
4972
4973 /* ignore, no monitoring. */
4974 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4975 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4976 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4977 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4978 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4979 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4980 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4981 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4982 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4983 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4984 case PGMPOOLKIND_ROOT_NESTED:
4985 case PGMPOOLKIND_PAE_PD_PHYS:
4986 case PGMPOOLKIND_PAE_PDPT_PHYS:
4987 case PGMPOOLKIND_32BIT_PD_PHYS:
4988 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4989 break;
4990
4991 default:
4992 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4993 }
4994 }
4995
4996 /* next */
4997 i = pPage->iNext;
4998 } while (i != NIL_PGMPOOL_IDX);
4999 return;
5000}
5001#endif /* IN_RING3 */
5002
5003#ifdef IN_RING3
5004
5005
5006/**
5007 * Reset CPU on hot plugging.
5008 *
5009 * @param pVM The VM handle.
5010 * @param pVCpu The virtual CPU.
5011 */
5012void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5013{
5014 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5015
5016 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5017 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5018 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5019}
5020
5021
5022/**
5023 * Flushes the entire cache.
5024 *
5025 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5026 * this and execute this CR3 flush.
5027 *
5028 * @param pPool The pool.
5029 */
5030void pgmR3PoolReset(PVM pVM)
5031{
5032 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5033
5034 Assert(PGMIsLockOwner(pVM));
5035 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5036 LogFlow(("pgmR3PoolReset:\n"));
5037
5038 /*
5039 * If there are no pages in the pool, there is nothing to do.
5040 */
5041 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5042 {
5043 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5044 return;
5045 }
5046
5047 /*
5048 * Exit the shadow mode since we're going to clear everything,
5049 * including the root page.
5050 */
5051 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5052 {
5053 PVMCPU pVCpu = &pVM->aCpus[i];
5054 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5055 }
5056
5057 /*
5058 * Nuke the free list and reinsert all pages into it.
5059 */
5060 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5061 {
5062 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5063
5064 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5065 if (pPage->fMonitored)
5066 pgmPoolMonitorFlush(pPool, pPage);
5067 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5068 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5069 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5070 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5071 pPage->cModifications = 0;
5072 pPage->GCPhys = NIL_RTGCPHYS;
5073 pPage->enmKind = PGMPOOLKIND_FREE;
5074 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5075 Assert(pPage->idx == i);
5076 pPage->iNext = i + 1;
5077 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5078 pPage->fSeenNonGlobal = false;
5079 pPage->fMonitored = false;
5080#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5081 pPage->fDirty = false;
5082#endif
5083 pPage->fCached = false;
5084 pPage->fReusedFlushPending = false;
5085 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5086 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5087 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5088 pPage->cLocked = 0;
5089 }
5090 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5091 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5092 pPool->cUsedPages = 0;
5093
5094 /*
5095 * Zap and reinitialize the user records.
5096 */
5097 pPool->cPresent = 0;
5098 pPool->iUserFreeHead = 0;
5099 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5100 const unsigned cMaxUsers = pPool->cMaxUsers;
5101 for (unsigned i = 0; i < cMaxUsers; i++)
5102 {
5103 paUsers[i].iNext = i + 1;
5104 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5105 paUsers[i].iUserTable = 0xfffffffe;
5106 }
5107 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5108
5109 /*
5110 * Clear all the GCPhys links and rebuild the phys ext free list.
5111 */
5112 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
5113 pRam;
5114 pRam = pRam->CTX_SUFF(pNext))
5115 {
5116 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5117 while (iPage-- > 0)
5118 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
5119 }
5120
5121 pPool->iPhysExtFreeHead = 0;
5122 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5123 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5124 for (unsigned i = 0; i < cMaxPhysExts; i++)
5125 {
5126 paPhysExts[i].iNext = i + 1;
5127 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5128 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5129 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5130 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5131 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5132 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5133 }
5134 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5135
5136 /*
5137 * Just zap the modified list.
5138 */
5139 pPool->cModifiedPages = 0;
5140 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5141
5142 /*
5143 * Clear the GCPhys hash and the age list.
5144 */
5145 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5146 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5147 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5148 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5149
5150#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5151 /* Clear all dirty pages. */
5152 pPool->idxFreeDirtyPage = 0;
5153 pPool->cDirtyPages = 0;
5154 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
5155 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
5156#endif
5157
5158 /*
5159 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5160 */
5161 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
5162 {
5163 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5164 pPage->iNext = NIL_PGMPOOL_IDX;
5165 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5166 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5167 pPage->cModifications = 0;
5168 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
5169 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5170 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5171 if (pPage->fMonitored)
5172 {
5173 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
5174 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
5175 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
5176 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5177 pPool->pszAccessHandler);
5178 AssertFatalRCSuccess(rc);
5179 pgmPoolHashInsert(pPool, pPage);
5180 }
5181 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5182 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5183 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5184 }
5185
5186 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5187 {
5188 /*
5189 * Re-enter the shadowing mode and assert Sync CR3 FF.
5190 */
5191 PVMCPU pVCpu = &pVM->aCpus[i];
5192 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5193 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5194 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5195 }
5196
5197 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5198}
5199#endif /* IN_RING3 */
5200
5201#ifdef LOG_ENABLED
5202static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5203{
5204 switch(enmKind)
5205 {
5206 case PGMPOOLKIND_INVALID:
5207 return "PGMPOOLKIND_INVALID";
5208 case PGMPOOLKIND_FREE:
5209 return "PGMPOOLKIND_FREE";
5210 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5211 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5212 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5213 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5214 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5215 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5216 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5217 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5218 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5219 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5220 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5221 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5222 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5223 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5224 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5225 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5226 case PGMPOOLKIND_32BIT_PD:
5227 return "PGMPOOLKIND_32BIT_PD";
5228 case PGMPOOLKIND_32BIT_PD_PHYS:
5229 return "PGMPOOLKIND_32BIT_PD_PHYS";
5230 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5231 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5232 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5233 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5234 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5235 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5236 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5237 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5238 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5239 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5240 case PGMPOOLKIND_PAE_PD_PHYS:
5241 return "PGMPOOLKIND_PAE_PD_PHYS";
5242 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5243 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5244 case PGMPOOLKIND_PAE_PDPT:
5245 return "PGMPOOLKIND_PAE_PDPT";
5246 case PGMPOOLKIND_PAE_PDPT_PHYS:
5247 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5248 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5249 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5250 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5251 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5252 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5253 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5254 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5255 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5256 case PGMPOOLKIND_64BIT_PML4:
5257 return "PGMPOOLKIND_64BIT_PML4";
5258 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5259 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5260 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5261 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5262 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5263 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5264 case PGMPOOLKIND_ROOT_NESTED:
5265 return "PGMPOOLKIND_ROOT_NESTED";
5266 }
5267 return "Unknown kind!";
5268}
5269#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette