VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 28840

Last change on this file since 28840 was 28840, checked in by vboxsync, 15 years ago

Flip RW bit for EPT pages as well.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 197.1 KB
Line 
1/* $Id: PGMAllPool.cpp 28840 2010-04-27 15:35:48Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/pgm.h>
24#include <VBox/mm.h>
25#include <VBox/em.h>
26#include <VBox/cpum.h>
27#ifdef IN_RC
28# include <VBox/patm.h>
29#endif
30#include "../PGMInternal.h"
31#include <VBox/vm.h>
32#include "../PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/hwacc_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/string.h>
40
41
42/*******************************************************************************
43* Internal Functions *
44*******************************************************************************/
45RT_C_DECLS_BEGIN
46static void pgmPoolFlushAllInt(PPGMPOOL pPool);
47DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
48DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
49static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
51static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#ifndef IN_RING3
53DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
54#endif
55#ifdef LOG_ENABLED
56static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
57#endif
58#if defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)
59static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT);
60#endif
61
62int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
63PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
64void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
65void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
66
67RT_C_DECLS_END
68
69
70/**
71 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
72 *
73 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
74 * @param enmKind The page kind.
75 */
76DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
77{
78 switch (enmKind)
79 {
80 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
82 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
83 return true;
84 default:
85 return false;
86 }
87}
88
89/** @def PGMPOOL_PAGE_2_LOCKED_PTR
90 * Maps a pool page pool into the current context and lock it (RC only).
91 *
92 * @returns VBox status code.
93 * @param pVM The VM handle.
94 * @param pPage The pool page.
95 *
96 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
97 * small page window employeed by that function. Be careful.
98 * @remark There is no need to assert on the result.
99 */
100#if defined(IN_RC)
101DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
102{
103 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
104
105 /* Make sure the dynamic mapping will not be reused. */
106 if (pv)
107 PGMDynLockHCPage(pVM, (uint8_t *)pv);
108
109 return pv;
110}
111#else
112# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
113#endif
114
115/** @def PGMPOOL_UNLOCK_PTR
116 * Unlock a previously locked dynamic caching (RC only).
117 *
118 * @returns VBox status code.
119 * @param pVM The VM handle.
120 * @param pPage The pool page.
121 *
122 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
123 * small page window employeed by that function. Be careful.
124 * @remark There is no need to assert on the result.
125 */
126#if defined(IN_RC)
127DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
128{
129 if (pvPage)
130 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
131}
132#else
133# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
134#endif
135
136
137/**
138 * Flushes a chain of pages sharing the same access monitor.
139 *
140 * @returns VBox status code suitable for scheduling.
141 * @param pPool The pool.
142 * @param pPage A page in the chain.
143 */
144int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
145{
146 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
147
148 /*
149 * Find the list head.
150 */
151 uint16_t idx = pPage->idx;
152 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
153 {
154 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
155 {
156 idx = pPage->iMonitoredPrev;
157 Assert(idx != pPage->idx);
158 pPage = &pPool->aPages[idx];
159 }
160 }
161
162 /*
163 * Iterate the list flushing each shadow page.
164 */
165 int rc = VINF_SUCCESS;
166 for (;;)
167 {
168 idx = pPage->iMonitoredNext;
169 Assert(idx != pPage->idx);
170 if (pPage->idx >= PGMPOOL_IDX_FIRST)
171 {
172 int rc2 = pgmPoolFlushPage(pPool, pPage);
173 AssertRC(rc2);
174 }
175 /* next */
176 if (idx == NIL_PGMPOOL_IDX)
177 break;
178 pPage = &pPool->aPages[idx];
179 }
180 return rc;
181}
182
183
184/**
185 * Wrapper for getting the current context pointer to the entry being modified.
186 *
187 * @returns VBox status code suitable for scheduling.
188 * @param pVM VM Handle.
189 * @param pvDst Destination address
190 * @param pvSrc Source guest virtual address.
191 * @param GCPhysSrc The source guest physical address.
192 * @param cb Size of data to read
193 */
194DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
195{
196#if defined(IN_RING3)
197 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
198 return VINF_SUCCESS;
199#else
200 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
201 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
202#endif
203}
204
205/**
206 * Process shadow entries before they are changed by the guest.
207 *
208 * For PT entries we will clear them. For PD entries, we'll simply check
209 * for mapping conflicts and set the SyncCR3 FF if found.
210 *
211 * @param pVCpu VMCPU handle
212 * @param pPool The pool.
213 * @param pPage The head page.
214 * @param GCPhysFault The guest physical fault address.
215 * @param uAddress In R0 and GC this is the guest context fault address (flat).
216 * In R3 this is the host context 'fault' address.
217 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
218 */
219void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
220{
221 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
222 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
223 PVM pVM = pPool->CTX_SUFF(pVM);
224
225 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
226
227 for (;;)
228 {
229 union
230 {
231 void *pv;
232 PX86PT pPT;
233 PX86PTPAE pPTPae;
234 PX86PD pPD;
235 PX86PDPAE pPDPae;
236 PX86PDPT pPDPT;
237 PX86PML4 pPML4;
238 } uShw;
239
240 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
241
242 uShw.pv = NULL;
243 switch (pPage->enmKind)
244 {
245 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
246 {
247 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
248 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
249 const unsigned iShw = off / sizeof(X86PTE);
250 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
251 if (uShw.pPT->a[iShw].n.u1Present)
252 {
253 X86PTE GstPte;
254
255 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
256 AssertRC(rc);
257 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
258 pgmPoolTracDerefGCPhysHint(pPool, pPage,
259 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
260 GstPte.u & X86_PTE_PG_MASK,
261 iShw);
262 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
263 }
264 break;
265 }
266
267 /* page/2 sized */
268 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
269 {
270 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
271 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
272 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
273 {
274 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
275 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
276 if (uShw.pPTPae->a[iShw].n.u1Present)
277 {
278 X86PTE GstPte;
279 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
280 AssertRC(rc);
281
282 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
283 pgmPoolTracDerefGCPhysHint(pPool, pPage,
284 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
285 GstPte.u & X86_PTE_PG_MASK,
286 iShw);
287 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
288 }
289 }
290 break;
291 }
292
293 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
294 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
295 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
296 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
297 {
298 unsigned iGst = off / sizeof(X86PDE);
299 unsigned iShwPdpt = iGst / 256;
300 unsigned iShw = (iGst % 256) * 2;
301 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
302
303 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
304 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
305 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
306 {
307 for (unsigned i = 0; i < 2; i++)
308 {
309# ifndef IN_RING0
310 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
311 {
312 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
313 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
314 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
315 break;
316 }
317 else
318# endif /* !IN_RING0 */
319 if (uShw.pPDPae->a[iShw+i].n.u1Present)
320 {
321 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
322 pgmPoolFree(pVM,
323 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
324 pPage->idx,
325 iShw + i);
326 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
327 }
328
329 /* paranoia / a bit assumptive. */
330 if ( (off & 3)
331 && (off & 3) + cbWrite > 4)
332 {
333 const unsigned iShw2 = iShw + 2 + i;
334 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
335 {
336# ifndef IN_RING0
337 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
338 {
339 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
340 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
341 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
342 break;
343 }
344 else
345# endif /* !IN_RING0 */
346 if (uShw.pPDPae->a[iShw2].n.u1Present)
347 {
348 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
349 pgmPoolFree(pVM,
350 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
351 pPage->idx,
352 iShw2);
353 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
354 }
355 }
356 }
357 }
358 }
359 break;
360 }
361
362 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
363 {
364 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
365 const unsigned iShw = off / sizeof(X86PTEPAE);
366 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
367 if (uShw.pPTPae->a[iShw].n.u1Present)
368 {
369 X86PTEPAE GstPte;
370 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
371 AssertRC(rc);
372
373 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
374 pgmPoolTracDerefGCPhysHint(pPool, pPage,
375 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
376 GstPte.u & X86_PTE_PAE_PG_MASK,
377 iShw);
378 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
379 }
380
381 /* paranoia / a bit assumptive. */
382 if ( (off & 7)
383 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
384 {
385 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
386 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
387
388 if (uShw.pPTPae->a[iShw2].n.u1Present)
389 {
390 X86PTEPAE GstPte;
391# ifdef IN_RING3
392 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
393# else
394 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
395# endif
396 AssertRC(rc);
397 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
398 pgmPoolTracDerefGCPhysHint(pPool, pPage,
399 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
400 GstPte.u & X86_PTE_PAE_PG_MASK,
401 iShw2);
402 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
403 }
404 }
405 break;
406 }
407
408 case PGMPOOLKIND_32BIT_PD:
409 {
410 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
411 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
412
413 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
414 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
415# ifndef IN_RING0
416 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
417 {
418 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
419 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
420 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
421 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
422 break;
423 }
424# endif /* !IN_RING0 */
425# ifndef IN_RING0
426 else
427# endif /* !IN_RING0 */
428 {
429 if (uShw.pPD->a[iShw].n.u1Present)
430 {
431 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
432 pgmPoolFree(pVM,
433 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
434 pPage->idx,
435 iShw);
436 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
437 }
438 }
439 /* paranoia / a bit assumptive. */
440 if ( (off & 3)
441 && (off & 3) + cbWrite > sizeof(X86PTE))
442 {
443 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
444 if ( iShw2 != iShw
445 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
446 {
447# ifndef IN_RING0
448 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
449 {
450 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
451 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
452 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
453 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
454 break;
455 }
456# endif /* !IN_RING0 */
457# ifndef IN_RING0
458 else
459# endif /* !IN_RING0 */
460 {
461 if (uShw.pPD->a[iShw2].n.u1Present)
462 {
463 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
464 pgmPoolFree(pVM,
465 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
466 pPage->idx,
467 iShw2);
468 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
469 }
470 }
471 }
472 }
473#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
474 if ( uShw.pPD->a[iShw].n.u1Present
475 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
476 {
477 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
478# ifdef IN_RC /* TLB load - we're pushing things a bit... */
479 ASMProbeReadByte(pvAddress);
480# endif
481 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
482 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
483 }
484#endif
485 break;
486 }
487
488 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
489 {
490 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
491 const unsigned iShw = off / sizeof(X86PDEPAE);
492 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
493#ifndef IN_RING0
494 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
495 {
496 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
497 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
498 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
499 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
500 break;
501 }
502#endif /* !IN_RING0 */
503 /*
504 * Causes trouble when the guest uses a PDE to refer to the whole page table level
505 * structure. (Invalidate here; faults later on when it tries to change the page
506 * table entries -> recheck; probably only applies to the RC case.)
507 */
508# ifndef IN_RING0
509 else
510# endif /* !IN_RING0 */
511 {
512 if (uShw.pPDPae->a[iShw].n.u1Present)
513 {
514 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
515 pgmPoolFree(pVM,
516 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
517 pPage->idx,
518 iShw);
519 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
520 }
521 }
522 /* paranoia / a bit assumptive. */
523 if ( (off & 7)
524 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
525 {
526 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
527 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
528
529#ifndef IN_RING0
530 if ( iShw2 != iShw
531 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
532 {
533 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
534 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
535 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
536 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
537 break;
538 }
539#endif /* !IN_RING0 */
540# ifndef IN_RING0
541 else
542# endif /* !IN_RING0 */
543 if (uShw.pPDPae->a[iShw2].n.u1Present)
544 {
545 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
546 pgmPoolFree(pVM,
547 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
548 pPage->idx,
549 iShw2);
550 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
551 }
552 }
553 break;
554 }
555
556 case PGMPOOLKIND_PAE_PDPT:
557 {
558 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
559 /*
560 * Hopefully this doesn't happen very often:
561 * - touching unused parts of the page
562 * - messing with the bits of pd pointers without changing the physical address
563 */
564 /* PDPT roots are not page aligned; 32 byte only! */
565 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
566
567 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
568 const unsigned iShw = offPdpt / sizeof(X86PDPE);
569 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
570 {
571# ifndef IN_RING0
572 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
573 {
574 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
575 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
576 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
577 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
578 break;
579 }
580# endif /* !IN_RING0 */
581# ifndef IN_RING0
582 else
583# endif /* !IN_RING0 */
584 if (uShw.pPDPT->a[iShw].n.u1Present)
585 {
586 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
587 pgmPoolFree(pVM,
588 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
589 pPage->idx,
590 iShw);
591 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
592 }
593
594 /* paranoia / a bit assumptive. */
595 if ( (offPdpt & 7)
596 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
597 {
598 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
599 if ( iShw2 != iShw
600 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
601 {
602# ifndef IN_RING0
603 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
604 {
605 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
606 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
607 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
608 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
609 break;
610 }
611# endif /* !IN_RING0 */
612# ifndef IN_RING0
613 else
614# endif /* !IN_RING0 */
615 if (uShw.pPDPT->a[iShw2].n.u1Present)
616 {
617 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
618 pgmPoolFree(pVM,
619 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
620 pPage->idx,
621 iShw2);
622 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
623 }
624 }
625 }
626 }
627 break;
628 }
629
630#ifndef IN_RC
631 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
632 {
633 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
634 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
635 const unsigned iShw = off / sizeof(X86PDEPAE);
636 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
637 if (uShw.pPDPae->a[iShw].n.u1Present)
638 {
639 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
640 pgmPoolFree(pVM,
641 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
642 pPage->idx,
643 iShw);
644 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
645 }
646 /* paranoia / a bit assumptive. */
647 if ( (off & 7)
648 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
649 {
650 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
651 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
652
653 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
654 if (uShw.pPDPae->a[iShw2].n.u1Present)
655 {
656 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
657 pgmPoolFree(pVM,
658 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
659 pPage->idx,
660 iShw2);
661 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
662 }
663 }
664 break;
665 }
666
667 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
668 {
669 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
670 /*
671 * Hopefully this doesn't happen very often:
672 * - messing with the bits of pd pointers without changing the physical address
673 */
674 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
675 const unsigned iShw = off / sizeof(X86PDPE);
676 if (uShw.pPDPT->a[iShw].n.u1Present)
677 {
678 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
679 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
680 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
681 }
682 /* paranoia / a bit assumptive. */
683 if ( (off & 7)
684 && (off & 7) + cbWrite > sizeof(X86PDPE))
685 {
686 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
687 if (uShw.pPDPT->a[iShw2].n.u1Present)
688 {
689 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
690 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
691 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
692 }
693 }
694 break;
695 }
696
697 case PGMPOOLKIND_64BIT_PML4:
698 {
699 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
700 /*
701 * Hopefully this doesn't happen very often:
702 * - messing with the bits of pd pointers without changing the physical address
703 */
704 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
705 const unsigned iShw = off / sizeof(X86PDPE);
706 if (uShw.pPML4->a[iShw].n.u1Present)
707 {
708 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
709 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
710 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
711 }
712 /* paranoia / a bit assumptive. */
713 if ( (off & 7)
714 && (off & 7) + cbWrite > sizeof(X86PDPE))
715 {
716 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
717 if (uShw.pPML4->a[iShw2].n.u1Present)
718 {
719 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
720 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
721 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
722 }
723 }
724 break;
725 }
726#endif /* IN_RING0 */
727
728 default:
729 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
730 }
731 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
732
733 /* next */
734 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
735 return;
736 pPage = &pPool->aPages[pPage->iMonitoredNext];
737 }
738}
739
740# ifndef IN_RING3
741/**
742 * Checks if a access could be a fork operation in progress.
743 *
744 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
745 *
746 * @returns true if it's likly that we're forking, otherwise false.
747 * @param pPool The pool.
748 * @param pDis The disassembled instruction.
749 * @param offFault The access offset.
750 */
751DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
752{
753 /*
754 * i386 linux is using btr to clear X86_PTE_RW.
755 * The functions involved are (2.6.16 source inspection):
756 * clear_bit
757 * ptep_set_wrprotect
758 * copy_one_pte
759 * copy_pte_range
760 * copy_pmd_range
761 * copy_pud_range
762 * copy_page_range
763 * dup_mmap
764 * dup_mm
765 * copy_mm
766 * copy_process
767 * do_fork
768 */
769 if ( pDis->pCurInstr->opcode == OP_BTR
770 && !(offFault & 4)
771 /** @todo Validate that the bit index is X86_PTE_RW. */
772 )
773 {
774 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
775 return true;
776 }
777 return false;
778}
779
780
781/**
782 * Determine whether the page is likely to have been reused.
783 *
784 * @returns true if we consider the page as being reused for a different purpose.
785 * @returns false if we consider it to still be a paging page.
786 * @param pVM VM Handle.
787 * @param pVCpu VMCPU Handle.
788 * @param pRegFrame Trap register frame.
789 * @param pDis The disassembly info for the faulting instruction.
790 * @param pvFault The fault address.
791 *
792 * @remark The REP prefix check is left to the caller because of STOSD/W.
793 */
794DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
795{
796#ifndef IN_RC
797 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
798 if ( HWACCMHasPendingIrq(pVM)
799 && (pRegFrame->rsp - pvFault) < 32)
800 {
801 /* Fault caused by stack writes while trying to inject an interrupt event. */
802 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
803 return true;
804 }
805#else
806 NOREF(pVM); NOREF(pvFault);
807#endif
808
809 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
810
811 /* Non-supervisor mode write means it's used for something else. */
812 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
813 return true;
814
815 switch (pDis->pCurInstr->opcode)
816 {
817 /* call implies the actual push of the return address faulted */
818 case OP_CALL:
819 Log4(("pgmPoolMonitorIsReused: CALL\n"));
820 return true;
821 case OP_PUSH:
822 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
823 return true;
824 case OP_PUSHF:
825 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
826 return true;
827 case OP_PUSHA:
828 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
829 return true;
830 case OP_FXSAVE:
831 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
832 return true;
833 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
834 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
835 return true;
836 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
837 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
838 return true;
839 case OP_MOVSWD:
840 case OP_STOSWD:
841 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
842 && pRegFrame->rcx >= 0x40
843 )
844 {
845 Assert(pDis->mode == CPUMODE_64BIT);
846
847 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
848 return true;
849 }
850 return false;
851 }
852 if ( ( (pDis->param1.flags & USE_REG_GEN32)
853 || (pDis->param1.flags & USE_REG_GEN64))
854 && (pDis->param1.base.reg_gen == USE_REG_ESP))
855 {
856 Log4(("pgmPoolMonitorIsReused: ESP\n"));
857 return true;
858 }
859
860 return false;
861}
862
863/**
864 * Flushes the page being accessed.
865 *
866 * @returns VBox status code suitable for scheduling.
867 * @param pVM The VM handle.
868 * @param pVCpu The VMCPU handle.
869 * @param pPool The pool.
870 * @param pPage The pool page (head).
871 * @param pDis The disassembly of the write instruction.
872 * @param pRegFrame The trap register frame.
873 * @param GCPhysFault The fault address as guest physical address.
874 * @param pvFault The fault address.
875 */
876static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
877 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
878{
879 /*
880 * First, do the flushing.
881 */
882 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
883
884 /*
885 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection). Must do this in raw mode (!); XP boot will fail otherwise
886 */
887 uint32_t cbWritten;
888 int rc2 = EMInterpretInstructionCPUEx(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten, EMCODETYPE_ALL);
889 if (RT_SUCCESS(rc2))
890 pRegFrame->rip += pDis->opsize;
891 else if (rc2 == VERR_EM_INTERPRETER)
892 {
893#ifdef IN_RC
894 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
895 {
896 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
897 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
898 rc = VINF_SUCCESS;
899 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
900 }
901 else
902#endif
903 {
904 rc = VINF_EM_RAW_EMULATE_INSTR;
905 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
906 }
907 }
908 else
909 rc = rc2;
910
911 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
912 return rc;
913}
914
915/**
916 * Handles the STOSD write accesses.
917 *
918 * @returns VBox status code suitable for scheduling.
919 * @param pVM The VM handle.
920 * @param pPool The pool.
921 * @param pPage The pool page (head).
922 * @param pDis The disassembly of the write instruction.
923 * @param pRegFrame The trap register frame.
924 * @param GCPhysFault The fault address as guest physical address.
925 * @param pvFault The fault address.
926 */
927DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
928 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
929{
930 unsigned uIncrement = pDis->param1.size;
931
932 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
933 Assert(pRegFrame->rcx <= 0x20);
934
935#ifdef VBOX_STRICT
936 if (pDis->opmode == CPUMODE_32BIT)
937 Assert(uIncrement == 4);
938 else
939 Assert(uIncrement == 8);
940#endif
941
942 Log3(("pgmPoolAccessHandlerSTOSD\n"));
943
944 /*
945 * Increment the modification counter and insert it into the list
946 * of modified pages the first time.
947 */
948 if (!pPage->cModifications++)
949 pgmPoolMonitorModifiedInsert(pPool, pPage);
950
951 /*
952 * Execute REP STOSD.
953 *
954 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
955 * write situation, meaning that it's safe to write here.
956 */
957 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
958 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
959 while (pRegFrame->rcx)
960 {
961#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
962 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
963 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
964 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
965#else
966 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
967#endif
968#ifdef IN_RC
969 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
970#else
971 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
972#endif
973 pu32 += uIncrement;
974 GCPhysFault += uIncrement;
975 pRegFrame->rdi += uIncrement;
976 pRegFrame->rcx--;
977 }
978 pRegFrame->rip += pDis->opsize;
979
980 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
981 return VINF_SUCCESS;
982}
983
984
985/**
986 * Handles the simple write accesses.
987 *
988 * @returns VBox status code suitable for scheduling.
989 * @param pVM The VM handle.
990 * @param pVCpu The VMCPU handle.
991 * @param pPool The pool.
992 * @param pPage The pool page (head).
993 * @param pDis The disassembly of the write instruction.
994 * @param pRegFrame The trap register frame.
995 * @param GCPhysFault The fault address as guest physical address.
996 * @param pvFault The fault address.
997 * @param pfReused Reused state (out)
998 */
999DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1000 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
1001{
1002 Log3(("pgmPoolAccessHandlerSimple\n"));
1003 /*
1004 * Increment the modification counter and insert it into the list
1005 * of modified pages the first time.
1006 */
1007 if (!pPage->cModifications++)
1008 pgmPoolMonitorModifiedInsert(pPool, pPage);
1009
1010 /*
1011 * Clear all the pages. ASSUMES that pvFault is readable.
1012 */
1013#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1014 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1015 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1016 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1017#else
1018 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1019#endif
1020
1021 /*
1022 * Interpret the instruction.
1023 */
1024 uint32_t cb;
1025 int rc = EMInterpretInstructionCPUEx(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb, EMCODETYPE_ALL);
1026 if (RT_SUCCESS(rc))
1027 pRegFrame->rip += pDis->opsize;
1028 else if (rc == VERR_EM_INTERPRETER)
1029 {
1030 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1031 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1032 rc = VINF_EM_RAW_EMULATE_INSTR;
1033 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1034 }
1035
1036#if 0 /* experimental code */
1037 if (rc == VINF_SUCCESS)
1038 {
1039 switch (pPage->enmKind)
1040 {
1041 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1042 {
1043 X86PTEPAE GstPte;
1044 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1045 AssertRC(rc);
1046
1047 /* Check the new value written by the guest. If present and with a bogus physical address, then
1048 * it's fairly safe to assume the guest is reusing the PT.
1049 */
1050 if (GstPte.n.u1Present)
1051 {
1052 RTHCPHYS HCPhys = -1;
1053 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1054 if (rc != VINF_SUCCESS)
1055 {
1056 *pfReused = true;
1057 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1058 }
1059 }
1060 break;
1061 }
1062 }
1063 }
1064#endif
1065
1066 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1067 return rc;
1068}
1069
1070/**
1071 * \#PF Handler callback for PT write accesses.
1072 *
1073 * @returns VBox status code (appropriate for GC return).
1074 * @param pVM VM Handle.
1075 * @param uErrorCode CPU Error code.
1076 * @param pRegFrame Trap register frame.
1077 * NULL on DMA and other non CPU access.
1078 * @param pvFault The fault address (cr2).
1079 * @param GCPhysFault The GC physical address corresponding to pvFault.
1080 * @param pvUser User argument.
1081 */
1082DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1083{
1084 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1085 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1086 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1087 PVMCPU pVCpu = VMMGetCpu(pVM);
1088 unsigned cMaxModifications;
1089 bool fForcedFlush = false;
1090
1091 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1092
1093 pgmLock(pVM);
1094 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1095 {
1096 /* Pool page changed while we were waiting for the lock; ignore. */
1097 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1098 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1099 pgmUnlock(pVM);
1100 return VINF_SUCCESS;
1101 }
1102#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1103 if (pPage->fDirty)
1104 {
1105 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1106 pgmUnlock(pVM);
1107 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1108 }
1109#endif
1110
1111#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1112 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1113 {
1114 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1115 void *pvGst;
1116 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1117 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1118 }
1119#endif
1120
1121 /*
1122 * Disassemble the faulting instruction.
1123 */
1124 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1125 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1126 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1127 {
1128 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1129 pgmUnlock(pVM);
1130 return rc;
1131 }
1132
1133 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1134
1135 /*
1136 * We should ALWAYS have the list head as user parameter. This
1137 * is because we use that page to record the changes.
1138 */
1139 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1140
1141#ifdef IN_RING0
1142 /* Maximum nr of modifications depends on the page type. */
1143 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1144 cMaxModifications = 4;
1145 else
1146 cMaxModifications = 24;
1147#else
1148 cMaxModifications = 48;
1149#endif
1150
1151 /*
1152 * Incremental page table updates should weigh more than random ones.
1153 * (Only applies when started from offset 0)
1154 */
1155 pVCpu->pgm.s.cPoolAccessHandler++;
1156 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1157 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1158 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1159 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1160 {
1161 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1162 Assert(pPage->cModifications < 32000);
1163 pPage->cModifications = pPage->cModifications * 2;
1164 pPage->pvLastAccessHandlerFault = pvFault;
1165 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1166 if (pPage->cModifications >= cMaxModifications)
1167 {
1168 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1169 fForcedFlush = true;
1170 }
1171 }
1172
1173 if (pPage->cModifications >= cMaxModifications)
1174 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1175
1176 /*
1177 * Check if it's worth dealing with.
1178 */
1179 bool fReused = false;
1180 bool fNotReusedNotForking = false;
1181 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1182 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1183 )
1184 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1185 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1186 {
1187 /*
1188 * Simple instructions, no REP prefix.
1189 */
1190 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1191 {
1192 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1193 if (fReused)
1194 goto flushPage;
1195
1196 /* A mov instruction to change the first page table entry will be remembered so we can detect
1197 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1198 */
1199 if ( rc == VINF_SUCCESS
1200 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1201 && pDis->pCurInstr->opcode == OP_MOV
1202 && (pvFault & PAGE_OFFSET_MASK) == 0)
1203 {
1204 pPage->pvLastAccessHandlerFault = pvFault;
1205 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1206 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1207 /* Make sure we don't kick out a page too quickly. */
1208 if (pPage->cModifications > 8)
1209 pPage->cModifications = 2;
1210 }
1211 else
1212 if (pPage->pvLastAccessHandlerFault == pvFault)
1213 {
1214 /* ignore the 2nd write to this page table entry. */
1215 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1216 }
1217 else
1218 {
1219 pPage->pvLastAccessHandlerFault = 0;
1220 pPage->pvLastAccessHandlerRip = 0;
1221 }
1222
1223 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1224 pgmUnlock(pVM);
1225 return rc;
1226 }
1227
1228 /*
1229 * Windows is frequently doing small memset() operations (netio test 4k+).
1230 * We have to deal with these or we'll kill the cache and performance.
1231 */
1232 if ( pDis->pCurInstr->opcode == OP_STOSWD
1233 && !pRegFrame->eflags.Bits.u1DF
1234 && pDis->opmode == pDis->mode
1235 && pDis->addrmode == pDis->mode)
1236 {
1237 bool fValidStosd = false;
1238
1239 if ( pDis->mode == CPUMODE_32BIT
1240 && pDis->prefix == PREFIX_REP
1241 && pRegFrame->ecx <= 0x20
1242 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1243 && !((uintptr_t)pvFault & 3)
1244 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1245 )
1246 {
1247 fValidStosd = true;
1248 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1249 }
1250 else
1251 if ( pDis->mode == CPUMODE_64BIT
1252 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1253 && pRegFrame->rcx <= 0x20
1254 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1255 && !((uintptr_t)pvFault & 7)
1256 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1257 )
1258 {
1259 fValidStosd = true;
1260 }
1261
1262 if (fValidStosd)
1263 {
1264 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1265 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1266 pgmUnlock(pVM);
1267 return rc;
1268 }
1269 }
1270
1271 /* REP prefix, don't bother. */
1272 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1273 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1274 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1275 fNotReusedNotForking = true;
1276 }
1277
1278#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1279 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1280 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1281 */
1282 if ( pPage->cModifications >= cMaxModifications
1283 && !fForcedFlush
1284 && pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1285 && ( fNotReusedNotForking
1286 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1287 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1288 )
1289 )
1290 {
1291 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1292 Assert(pPage->fDirty == false);
1293
1294 /* Flush any monitored duplicates as we will disable write protection. */
1295 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1296 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1297 {
1298 PPGMPOOLPAGE pPageHead = pPage;
1299
1300 /* Find the monitor head. */
1301 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1302 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1303
1304 while (pPageHead)
1305 {
1306 unsigned idxNext = pPageHead->iMonitoredNext;
1307
1308 if (pPageHead != pPage)
1309 {
1310 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1311 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1312 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1313 AssertRC(rc2);
1314 }
1315
1316 if (idxNext == NIL_PGMPOOL_IDX)
1317 break;
1318
1319 pPageHead = &pPool->aPages[idxNext];
1320 }
1321 }
1322
1323 /* The flushing above might fail for locked pages, so double check. */
1324 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1325 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1326 {
1327 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1328
1329 /* Temporarily allow write access to the page table again. */
1330 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
1331 if (rc == VINF_SUCCESS)
1332 {
1333 rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
1334 AssertMsg(rc == VINF_SUCCESS
1335 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1336 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1337 || rc == VERR_PAGE_NOT_PRESENT,
1338 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1339
1340 pPage->pvDirtyFault = pvFault;
1341
1342 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1343 pgmUnlock(pVM);
1344 return rc;
1345 }
1346 }
1347 }
1348#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1349
1350 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1351flushPage:
1352 /*
1353 * Not worth it, so flush it.
1354 *
1355 * If we considered it to be reused, don't go back to ring-3
1356 * to emulate failed instructions since we usually cannot
1357 * interpret then. This may be a bit risky, in which case
1358 * the reuse detection must be fixed.
1359 */
1360 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1361 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1362 && fReused)
1363 {
1364 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1365 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1366 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1367 }
1368 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1369 pgmUnlock(pVM);
1370 return rc;
1371}
1372
1373# endif /* !IN_RING3 */
1374
1375# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1376
1377# ifdef VBOX_STRICT
1378/**
1379 * Check references to guest physical memory in a PAE / PAE page table.
1380 *
1381 * @param pPool The pool.
1382 * @param pPage The page.
1383 * @param pShwPT The shadow page table (mapping of the page).
1384 * @param pGstPT The guest page table.
1385 */
1386static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
1387{
1388 unsigned cErrors = 0;
1389 int LastRc = -1; /* initialized to shut up gcc */
1390 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1391 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1392
1393#ifdef VBOX_STRICT
1394 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1395 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1396#endif
1397 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1398 {
1399 if (pShwPT->a[i].n.u1Present)
1400 {
1401 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1402 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1403 if ( rc != VINF_SUCCESS
1404 || (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) != HCPhys)
1405 {
1406 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1407 LastPTE = i;
1408 LastRc = rc;
1409 LastHCPhys = HCPhys;
1410 cErrors++;
1411
1412 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1413 rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pPage->GCPhys, &HCPhysPT);
1414 AssertRC(rc);
1415
1416 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1417 {
1418 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1419
1420 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1421 {
1422 PX86PTPAE pShwPT2 = (PX86PTPAE)PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pTempPage);
1423
1424 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1425 {
1426 if ( pShwPT2->a[j].n.u1Present
1427 && pShwPT2->a[j].n.u1Write
1428 && ((pShwPT2->a[j].u & X86_PTE_PAE_PG_MASK) == HCPhysPT))
1429 {
1430 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, pShwPT->a[j].u, pShwPT2->a[j].u));
1431 }
1432 }
1433 }
1434 }
1435 }
1436 }
1437 }
1438 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, pShwPT->a[LastPTE].u, LastHCPhys));
1439}
1440# endif /* VBOX_STRICT */
1441
1442/**
1443 * Clear references to guest physical memory in a PAE / PAE page table.
1444 *
1445 * @returns nr of changed PTEs
1446 * @param pPool The pool.
1447 * @param pPage The page.
1448 * @param pShwPT The shadow page table (mapping of the page).
1449 * @param pGstPT The guest page table.
1450 * @param pOldGstPT The old cached guest page table.
1451 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1452 * @param pfFlush Flush reused page table (out)
1453 */
1454DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT, PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1455{
1456 unsigned cChanged = 0;
1457
1458#ifdef VBOX_STRICT
1459 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1460 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1461#endif
1462 *pfFlush = false;
1463
1464 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1465 {
1466 /* Check the new value written by the guest. If present and with a bogus physical address, then
1467 * it's fairly safe to assume the guest is reusing the PT.
1468 */
1469 if ( fAllowRemoval
1470 && pGstPT->a[i].n.u1Present)
1471 {
1472 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1473 {
1474 *pfFlush = true;
1475 return ++cChanged;
1476 }
1477 }
1478 if (pShwPT->a[i].n.u1Present)
1479 {
1480 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1481 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1482 {
1483#ifdef VBOX_STRICT
1484 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1485 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1486 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1487#endif
1488 uint64_t uHostAttr = pShwPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1489 bool fHostRW = !!(pShwPT->a[i].u & X86_PTE_RW);
1490 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1491 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1492
1493 if ( uHostAttr == uGuestAttr
1494 && fHostRW <= fGuestRW)
1495 continue;
1496 }
1497 cChanged++;
1498 /* Something was changed, so flush it. */
1499 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1500 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1501 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1502 ASMAtomicWriteSize(&pShwPT->a[i].u, 0);
1503 }
1504 }
1505 return cChanged;
1506}
1507
1508
1509/**
1510 * Flush a dirty page
1511 *
1512 * @param pVM VM Handle.
1513 * @param pPool The pool.
1514 * @param idxSlot Dirty array slot index
1515 * @param fAllowRemoval Allow a reused page table to be removed
1516 */
1517static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1518{
1519 PPGMPOOLPAGE pPage;
1520 unsigned idxPage;
1521
1522 Assert(idxSlot < RT_ELEMENTS(pPool->aIdxDirtyPages));
1523 if (pPool->aIdxDirtyPages[idxSlot] == NIL_PGMPOOL_IDX)
1524 return;
1525
1526 idxPage = pPool->aIdxDirtyPages[idxSlot];
1527 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1528 pPage = &pPool->aPages[idxPage];
1529 Assert(pPage->idx == idxPage);
1530 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1531
1532 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1533 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1534
1535 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1536 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys);
1537 Assert(rc == VINF_SUCCESS);
1538 pPage->fDirty = false;
1539
1540#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1541 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(VMMGetCpu(pVM));
1542#endif
1543
1544#ifdef VBOX_STRICT
1545 uint64_t fFlags = 0;
1546 RTHCPHYS HCPhys;
1547 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1548 AssertMsg( ( rc == VINF_SUCCESS
1549 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1550 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1551 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1552 || rc == VERR_PAGE_NOT_PRESENT,
1553 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1554#endif
1555
1556 /* Flush those PTEs that have changed. */
1557 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1558 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1559 void *pvGst;
1560 bool fFlush;
1561 rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1562 unsigned cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst, (PCX86PTPAE)&pPool->aDirtyPages[idxSlot][0], fAllowRemoval, &fFlush);
1563 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1564 /** Note: we might want to consider keeping the dirty page active in case there were many changes. */
1565
1566 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1567 Assert(pPage->cModifications);
1568 if (cChanges < 4)
1569 pPage->cModifications = 1; /* must use > 0 here */
1570 else
1571 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1572
1573 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1574 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages))
1575 pPool->idxFreeDirtyPage = idxSlot;
1576
1577 pPool->cDirtyPages--;
1578 pPool->aIdxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1579 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1580 if (fFlush)
1581 {
1582 Assert(fAllowRemoval);
1583 Log(("Flush reused page table!\n"));
1584 pgmPoolFlushPage(pPool, pPage);
1585 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1586 }
1587 else
1588 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1589
1590#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1591 PGMDynMapPopAutoSubset(VMMGetCpu(pVM), iPrevSubset);
1592#endif
1593}
1594
1595# ifndef IN_RING3
1596/**
1597 * Add a new dirty page
1598 *
1599 * @param pVM VM Handle.
1600 * @param pPool The pool.
1601 * @param pPage The page.
1602 */
1603void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1604{
1605 unsigned idxFree;
1606
1607 Assert(PGMIsLocked(pVM));
1608 AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
1609 Assert(!pPage->fDirty);
1610
1611 idxFree = pPool->idxFreeDirtyPage;
1612 Assert(idxFree < RT_ELEMENTS(pPool->aIdxDirtyPages));
1613 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1614
1615 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aIdxDirtyPages))
1616 {
1617 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1618 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1619 }
1620 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages));
1621 AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1622
1623 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1624
1625 /* Make a copy of the guest page table as we require valid GCPhys addresses when removing
1626 * references to physical pages. (the HCPhys linear lookup is *extremely* expensive!)
1627 */
1628 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1629 void *pvGst;
1630 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1631 memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
1632#ifdef VBOX_STRICT
1633 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1634#endif
1635
1636 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1637 pPage->fDirty = true;
1638 pPage->idxDirty = idxFree;
1639 pPool->aIdxDirtyPages[idxFree] = pPage->idx;
1640 pPool->cDirtyPages++;
1641
1642 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1643 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages)
1644 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1645 {
1646 unsigned i;
1647 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1648 {
1649 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1650 if (pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1651 {
1652 pPool->idxFreeDirtyPage = idxFree;
1653 break;
1654 }
1655 }
1656 Assert(i != RT_ELEMENTS(pPool->aIdxDirtyPages));
1657 }
1658
1659 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages) || pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1660 return;
1661}
1662# endif /* !IN_RING3 */
1663
1664/**
1665 * Check if the specified page is dirty (not write monitored)
1666 *
1667 * @return dirty or not
1668 * @param pVM VM Handle.
1669 * @param GCPhys Guest physical address
1670 */
1671bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1672{
1673 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1674 Assert(PGMIsLocked(pVM));
1675 if (!pPool->cDirtyPages)
1676 return false;
1677
1678 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1679
1680 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1681 {
1682 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1683 {
1684 PPGMPOOLPAGE pPage;
1685 unsigned idxPage = pPool->aIdxDirtyPages[i];
1686
1687 pPage = &pPool->aPages[idxPage];
1688 if (pPage->GCPhys == GCPhys)
1689 return true;
1690 }
1691 }
1692 return false;
1693}
1694
1695/**
1696 * Reset all dirty pages by reinstating page monitoring.
1697 *
1698 * @param pVM VM Handle.
1699 */
1700void pgmPoolResetDirtyPages(PVM pVM)
1701{
1702 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1703 Assert(PGMIsLocked(pVM));
1704 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1705
1706 if (!pPool->cDirtyPages)
1707 return;
1708
1709 Log(("pgmPoolResetDirtyPages\n"));
1710 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1711 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1712
1713 pPool->idxFreeDirtyPage = 0;
1714 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1715 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1716 {
1717 unsigned i;
1718 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1719 {
1720 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1721 {
1722 pPool->idxFreeDirtyPage = i;
1723 break;
1724 }
1725 }
1726 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1727 }
1728
1729 Assert(pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages));
1730 return;
1731}
1732
1733/**
1734 * Reset all dirty pages by reinstating page monitoring.
1735 *
1736 * @param pVM VM Handle.
1737 * @param GCPhysPT Physical address of the page table
1738 */
1739void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1740{
1741 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1742 Assert(PGMIsLocked(pVM));
1743 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1744 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aIdxDirtyPages);
1745
1746 if (!pPool->cDirtyPages)
1747 return;
1748
1749 GCPhysPT = GCPhysPT & ~(RTGCPHYS)(PAGE_SIZE - 1);
1750
1751 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1752 {
1753 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1754 {
1755 unsigned idxPage = pPool->aIdxDirtyPages[i];
1756
1757 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1758 if (pPage->GCPhys == GCPhysPT)
1759 {
1760 idxDirtyPage = i;
1761 break;
1762 }
1763 }
1764 }
1765
1766 if (idxDirtyPage != RT_ELEMENTS(pPool->aIdxDirtyPages))
1767 {
1768 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1769 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1770 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1771 {
1772 unsigned i;
1773 for (i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1774 {
1775 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1776 {
1777 pPool->idxFreeDirtyPage = i;
1778 break;
1779 }
1780 }
1781 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1782 }
1783 }
1784}
1785
1786# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1787
1788/**
1789 * Inserts a page into the GCPhys hash table.
1790 *
1791 * @param pPool The pool.
1792 * @param pPage The page.
1793 */
1794DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1795{
1796 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1797 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1798 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1799 pPage->iNext = pPool->aiHash[iHash];
1800 pPool->aiHash[iHash] = pPage->idx;
1801}
1802
1803
1804/**
1805 * Removes a page from the GCPhys hash table.
1806 *
1807 * @param pPool The pool.
1808 * @param pPage The page.
1809 */
1810DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1811{
1812 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1813 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1814 if (pPool->aiHash[iHash] == pPage->idx)
1815 pPool->aiHash[iHash] = pPage->iNext;
1816 else
1817 {
1818 uint16_t iPrev = pPool->aiHash[iHash];
1819 for (;;)
1820 {
1821 const int16_t i = pPool->aPages[iPrev].iNext;
1822 if (i == pPage->idx)
1823 {
1824 pPool->aPages[iPrev].iNext = pPage->iNext;
1825 break;
1826 }
1827 if (i == NIL_PGMPOOL_IDX)
1828 {
1829 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1830 break;
1831 }
1832 iPrev = i;
1833 }
1834 }
1835 pPage->iNext = NIL_PGMPOOL_IDX;
1836}
1837
1838
1839/**
1840 * Frees up one cache page.
1841 *
1842 * @returns VBox status code.
1843 * @retval VINF_SUCCESS on success.
1844 * @param pPool The pool.
1845 * @param iUser The user index.
1846 */
1847static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1848{
1849#ifndef IN_RC
1850 const PVM pVM = pPool->CTX_SUFF(pVM);
1851#endif
1852 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1853 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1854
1855 /*
1856 * Select one page from the tail of the age list.
1857 */
1858 PPGMPOOLPAGE pPage;
1859 for (unsigned iLoop = 0; ; iLoop++)
1860 {
1861 uint16_t iToFree = pPool->iAgeTail;
1862 if (iToFree == iUser)
1863 iToFree = pPool->aPages[iToFree].iAgePrev;
1864/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1865 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1866 {
1867 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1868 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1869 {
1870 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1871 continue;
1872 iToFree = i;
1873 break;
1874 }
1875 }
1876*/
1877 Assert(iToFree != iUser);
1878 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1879 pPage = &pPool->aPages[iToFree];
1880
1881 /*
1882 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1883 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1884 */
1885 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1886 break;
1887 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1888 pgmPoolCacheUsed(pPool, pPage);
1889 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1890 }
1891
1892 /*
1893 * Found a usable page, flush it and return.
1894 */
1895 int rc = pgmPoolFlushPage(pPool, pPage);
1896 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1897 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
1898 if (rc == VINF_SUCCESS)
1899 PGM_INVL_ALL_VCPU_TLBS(pVM);
1900 return rc;
1901}
1902
1903
1904/**
1905 * Checks if a kind mismatch is really a page being reused
1906 * or if it's just normal remappings.
1907 *
1908 * @returns true if reused and the cached page (enmKind1) should be flushed
1909 * @returns false if not reused.
1910 * @param enmKind1 The kind of the cached page.
1911 * @param enmKind2 The kind of the requested page.
1912 */
1913static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1914{
1915 switch (enmKind1)
1916 {
1917 /*
1918 * Never reuse them. There is no remapping in non-paging mode.
1919 */
1920 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1921 case PGMPOOLKIND_32BIT_PD_PHYS:
1922 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1923 case PGMPOOLKIND_PAE_PD_PHYS:
1924 case PGMPOOLKIND_PAE_PDPT_PHYS:
1925 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1926 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1927 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1928 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1929 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1930 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1931 return false;
1932
1933 /*
1934 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1935 */
1936 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1937 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1938 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1939 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1940 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1941 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1942 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1943 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1944 case PGMPOOLKIND_32BIT_PD:
1945 case PGMPOOLKIND_PAE_PDPT:
1946 switch (enmKind2)
1947 {
1948 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1949 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1950 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1951 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1952 case PGMPOOLKIND_64BIT_PML4:
1953 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1954 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1955 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1956 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1957 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1958 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1959 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1960 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1961 return true;
1962 default:
1963 return false;
1964 }
1965
1966 /*
1967 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1968 */
1969 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1970 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1971 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1972 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1973 case PGMPOOLKIND_64BIT_PML4:
1974 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1975 switch (enmKind2)
1976 {
1977 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1978 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1979 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1980 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1981 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1982 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1983 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1984 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1985 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1986 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1987 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1988 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1989 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1990 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1991 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1992 return true;
1993 default:
1994 return false;
1995 }
1996
1997 /*
1998 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1999 */
2000 case PGMPOOLKIND_ROOT_NESTED:
2001 return false;
2002
2003 default:
2004 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2005 }
2006}
2007
2008
2009/**
2010 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2011 *
2012 * @returns VBox status code.
2013 * @retval VINF_PGM_CACHED_PAGE on success.
2014 * @retval VERR_FILE_NOT_FOUND if not found.
2015 * @param pPool The pool.
2016 * @param GCPhys The GC physical address of the page we're gonna shadow.
2017 * @param enmKind The kind of mapping.
2018 * @param enmAccess Access type for the mapping (only relevant for big pages)
2019 * @param iUser The shadow page pool index of the user table.
2020 * @param iUserTable The index into the user table (shadowed).
2021 * @param ppPage Where to store the pointer to the page.
2022 */
2023static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2024{
2025#ifndef IN_RC
2026 const PVM pVM = pPool->CTX_SUFF(pVM);
2027#endif
2028 /*
2029 * Look up the GCPhys in the hash.
2030 */
2031 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2032 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2033 if (i != NIL_PGMPOOL_IDX)
2034 {
2035 do
2036 {
2037 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2038 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2039 if (pPage->GCPhys == GCPhys)
2040 {
2041 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2042 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2043 {
2044 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2045 * doesn't flush it in case there are no more free use records.
2046 */
2047 pgmPoolCacheUsed(pPool, pPage);
2048
2049 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2050 if (RT_SUCCESS(rc))
2051 {
2052 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2053 *ppPage = pPage;
2054 if (pPage->cModifications)
2055 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2056 STAM_COUNTER_INC(&pPool->StatCacheHits);
2057 return VINF_PGM_CACHED_PAGE;
2058 }
2059 return rc;
2060 }
2061
2062 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2063 {
2064 /*
2065 * The kind is different. In some cases we should now flush the page
2066 * as it has been reused, but in most cases this is normal remapping
2067 * of PDs as PT or big pages using the GCPhys field in a slightly
2068 * different way than the other kinds.
2069 */
2070 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2071 {
2072 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2073 pgmPoolFlushPage(pPool, pPage);
2074 break;
2075 }
2076 }
2077 }
2078
2079 /* next */
2080 i = pPage->iNext;
2081 } while (i != NIL_PGMPOOL_IDX);
2082 }
2083
2084 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2085 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2086 return VERR_FILE_NOT_FOUND;
2087}
2088
2089
2090/**
2091 * Inserts a page into the cache.
2092 *
2093 * @param pPool The pool.
2094 * @param pPage The cached page.
2095 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2096 */
2097static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2098{
2099 /*
2100 * Insert into the GCPhys hash if the page is fit for that.
2101 */
2102 Assert(!pPage->fCached);
2103 if (fCanBeCached)
2104 {
2105 pPage->fCached = true;
2106 pgmPoolHashInsert(pPool, pPage);
2107 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2108 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2109 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2110 }
2111 else
2112 {
2113 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2114 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2115 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2116 }
2117
2118 /*
2119 * Insert at the head of the age list.
2120 */
2121 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2122 pPage->iAgeNext = pPool->iAgeHead;
2123 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2124 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2125 else
2126 pPool->iAgeTail = pPage->idx;
2127 pPool->iAgeHead = pPage->idx;
2128}
2129
2130
2131/**
2132 * Flushes a cached page.
2133 *
2134 * @param pPool The pool.
2135 * @param pPage The cached page.
2136 */
2137static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2138{
2139 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2140
2141 /*
2142 * Remove the page from the hash.
2143 */
2144 if (pPage->fCached)
2145 {
2146 pPage->fCached = false;
2147 pgmPoolHashRemove(pPool, pPage);
2148 }
2149 else
2150 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2151
2152 /*
2153 * Remove it from the age list.
2154 */
2155 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2156 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2157 else
2158 pPool->iAgeTail = pPage->iAgePrev;
2159 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2160 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2161 else
2162 pPool->iAgeHead = pPage->iAgeNext;
2163 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2164 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2165}
2166
2167
2168/**
2169 * Looks for pages sharing the monitor.
2170 *
2171 * @returns Pointer to the head page.
2172 * @returns NULL if not found.
2173 * @param pPool The Pool
2174 * @param pNewPage The page which is going to be monitored.
2175 */
2176static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2177{
2178 /*
2179 * Look up the GCPhys in the hash.
2180 */
2181 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2182 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2183 if (i == NIL_PGMPOOL_IDX)
2184 return NULL;
2185 do
2186 {
2187 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2188 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2189 && pPage != pNewPage)
2190 {
2191 switch (pPage->enmKind)
2192 {
2193 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2194 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2195 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2196 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2197 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2198 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2199 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2200 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2201 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2202 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2203 case PGMPOOLKIND_64BIT_PML4:
2204 case PGMPOOLKIND_32BIT_PD:
2205 case PGMPOOLKIND_PAE_PDPT:
2206 {
2207 /* find the head */
2208 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2209 {
2210 Assert(pPage->iMonitoredPrev != pPage->idx);
2211 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2212 }
2213 return pPage;
2214 }
2215
2216 /* ignore, no monitoring. */
2217 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2218 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2219 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2220 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2221 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2222 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2223 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2224 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2225 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2226 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2227 case PGMPOOLKIND_ROOT_NESTED:
2228 case PGMPOOLKIND_PAE_PD_PHYS:
2229 case PGMPOOLKIND_PAE_PDPT_PHYS:
2230 case PGMPOOLKIND_32BIT_PD_PHYS:
2231 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2232 break;
2233 default:
2234 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2235 }
2236 }
2237
2238 /* next */
2239 i = pPage->iNext;
2240 } while (i != NIL_PGMPOOL_IDX);
2241 return NULL;
2242}
2243
2244
2245/**
2246 * Enabled write monitoring of a guest page.
2247 *
2248 * @returns VBox status code.
2249 * @retval VINF_SUCCESS on success.
2250 * @param pPool The pool.
2251 * @param pPage The cached page.
2252 */
2253static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2254{
2255 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
2256
2257 /*
2258 * Filter out the relevant kinds.
2259 */
2260 switch (pPage->enmKind)
2261 {
2262 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2263 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2264 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2265 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2266 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2267 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2268 case PGMPOOLKIND_64BIT_PML4:
2269 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2270 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2271 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2272 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2273 case PGMPOOLKIND_32BIT_PD:
2274 case PGMPOOLKIND_PAE_PDPT:
2275 break;
2276
2277 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2278 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2279 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2280 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2281 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2282 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2283 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2284 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2285 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2286 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2287 case PGMPOOLKIND_ROOT_NESTED:
2288 /* Nothing to monitor here. */
2289 return VINF_SUCCESS;
2290
2291 case PGMPOOLKIND_32BIT_PD_PHYS:
2292 case PGMPOOLKIND_PAE_PDPT_PHYS:
2293 case PGMPOOLKIND_PAE_PD_PHYS:
2294 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2295 /* Nothing to monitor here. */
2296 return VINF_SUCCESS;
2297 default:
2298 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2299 }
2300
2301 /*
2302 * Install handler.
2303 */
2304 int rc;
2305 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2306 if (pPageHead)
2307 {
2308 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2309 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2310
2311#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2312 if (pPageHead->fDirty)
2313 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2314#endif
2315
2316 pPage->iMonitoredPrev = pPageHead->idx;
2317 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2318 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2319 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2320 pPageHead->iMonitoredNext = pPage->idx;
2321 rc = VINF_SUCCESS;
2322 }
2323 else
2324 {
2325 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2326 PVM pVM = pPool->CTX_SUFF(pVM);
2327 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2328 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2329 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
2330 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2331 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2332 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2333 pPool->pszAccessHandler);
2334 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2335 * the heap size should suffice. */
2336 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2337 PVMCPU pVCpu = VMMGetCpu(pVM);
2338 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2339 }
2340 pPage->fMonitored = true;
2341 return rc;
2342}
2343
2344
2345/**
2346 * Disables write monitoring of a guest page.
2347 *
2348 * @returns VBox status code.
2349 * @retval VINF_SUCCESS on success.
2350 * @param pPool The pool.
2351 * @param pPage The cached page.
2352 */
2353static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2354{
2355 /*
2356 * Filter out the relevant kinds.
2357 */
2358 switch (pPage->enmKind)
2359 {
2360 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2361 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2362 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2363 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2364 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2365 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2366 case PGMPOOLKIND_64BIT_PML4:
2367 case PGMPOOLKIND_32BIT_PD:
2368 case PGMPOOLKIND_PAE_PDPT:
2369 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2370 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2371 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2372 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2373 break;
2374
2375 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2376 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2377 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2378 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2379 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2380 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2381 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2382 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2383 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2384 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2385 case PGMPOOLKIND_ROOT_NESTED:
2386 case PGMPOOLKIND_PAE_PD_PHYS:
2387 case PGMPOOLKIND_PAE_PDPT_PHYS:
2388 case PGMPOOLKIND_32BIT_PD_PHYS:
2389 /* Nothing to monitor here. */
2390 Assert(!pPage->fMonitored);
2391 return VINF_SUCCESS;
2392
2393 default:
2394 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2395 }
2396 Assert(pPage->fMonitored);
2397
2398 /*
2399 * Remove the page from the monitored list or uninstall it if last.
2400 */
2401 const PVM pVM = pPool->CTX_SUFF(pVM);
2402 int rc;
2403 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2404 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2405 {
2406 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2407 {
2408 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2409 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2410 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2411 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2412 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2413 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2414 pPool->pszAccessHandler);
2415 AssertFatalRCSuccess(rc);
2416 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2417 }
2418 else
2419 {
2420 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2421 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2422 {
2423 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2424 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2425 }
2426 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2427 rc = VINF_SUCCESS;
2428 }
2429 }
2430 else
2431 {
2432 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2433 AssertFatalRC(rc);
2434 PVMCPU pVCpu = VMMGetCpu(pVM);
2435 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2436 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2437 }
2438 pPage->fMonitored = false;
2439
2440 /*
2441 * Remove it from the list of modified pages (if in it).
2442 */
2443 pgmPoolMonitorModifiedRemove(pPool, pPage);
2444
2445 return rc;
2446}
2447
2448
2449/**
2450 * Inserts the page into the list of modified pages.
2451 *
2452 * @param pPool The pool.
2453 * @param pPage The page.
2454 */
2455void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2456{
2457 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2458 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2459 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2460 && pPool->iModifiedHead != pPage->idx,
2461 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2462 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2463 pPool->iModifiedHead, pPool->cModifiedPages));
2464
2465 pPage->iModifiedNext = pPool->iModifiedHead;
2466 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2467 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2468 pPool->iModifiedHead = pPage->idx;
2469 pPool->cModifiedPages++;
2470#ifdef VBOX_WITH_STATISTICS
2471 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2472 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2473#endif
2474}
2475
2476
2477/**
2478 * Removes the page from the list of modified pages and resets the
2479 * moficiation counter.
2480 *
2481 * @param pPool The pool.
2482 * @param pPage The page which is believed to be in the list of modified pages.
2483 */
2484static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2485{
2486 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2487 if (pPool->iModifiedHead == pPage->idx)
2488 {
2489 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2490 pPool->iModifiedHead = pPage->iModifiedNext;
2491 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2492 {
2493 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2494 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2495 }
2496 pPool->cModifiedPages--;
2497 }
2498 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2499 {
2500 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2501 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2502 {
2503 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2504 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2505 }
2506 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2507 pPool->cModifiedPages--;
2508 }
2509 else
2510 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2511 pPage->cModifications = 0;
2512}
2513
2514
2515/**
2516 * Zaps the list of modified pages, resetting their modification counters in the process.
2517 *
2518 * @param pVM The VM handle.
2519 */
2520static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2521{
2522 pgmLock(pVM);
2523 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2524 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2525
2526 unsigned cPages = 0; NOREF(cPages);
2527
2528#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2529 pgmPoolResetDirtyPages(pVM);
2530#endif
2531
2532 uint16_t idx = pPool->iModifiedHead;
2533 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2534 while (idx != NIL_PGMPOOL_IDX)
2535 {
2536 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2537 idx = pPage->iModifiedNext;
2538 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2539 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2540 pPage->cModifications = 0;
2541 Assert(++cPages);
2542 }
2543 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2544 pPool->cModifiedPages = 0;
2545 pgmUnlock(pVM);
2546}
2547
2548
2549/**
2550 * Handle SyncCR3 pool tasks
2551 *
2552 * @returns VBox status code.
2553 * @retval VINF_SUCCESS if successfully added.
2554 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2555 * @param pVCpu The VMCPU handle.
2556 * @remark Should only be used when monitoring is available, thus placed in
2557 * the PGMPOOL_WITH_MONITORING #ifdef.
2558 */
2559int pgmPoolSyncCR3(PVMCPU pVCpu)
2560{
2561 PVM pVM = pVCpu->CTX_SUFF(pVM);
2562 LogFlow(("pgmPoolSyncCR3\n"));
2563
2564 /*
2565 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2566 * Occasionally we will have to clear all the shadow page tables because we wanted
2567 * to monitor a page which was mapped by too many shadowed page tables. This operation
2568 * sometimes refered to as a 'lightweight flush'.
2569 */
2570# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2571 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2572 pgmR3PoolClearAll(pVM);
2573# else /* !IN_RING3 */
2574 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2575 {
2576 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2577 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2578
2579 /* Make sure all other VCPUs return to ring 3. */
2580 if (pVM->cCpus > 1)
2581 {
2582 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2583 PGM_INVL_ALL_VCPU_TLBS(pVM);
2584 }
2585 return VINF_PGM_SYNC_CR3;
2586 }
2587# endif /* !IN_RING3 */
2588 else
2589 {
2590 pgmPoolMonitorModifiedClearAll(pVM);
2591
2592 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2593 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2594 {
2595 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2596 return pgmPoolSyncCR3(pVCpu);
2597 }
2598 }
2599 return VINF_SUCCESS;
2600}
2601
2602
2603/**
2604 * Frees up at least one user entry.
2605 *
2606 * @returns VBox status code.
2607 * @retval VINF_SUCCESS if successfully added.
2608 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2609 * @param pPool The pool.
2610 * @param iUser The user index.
2611 */
2612static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2613{
2614 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2615 /*
2616 * Just free cached pages in a braindead fashion.
2617 */
2618 /** @todo walk the age list backwards and free the first with usage. */
2619 int rc = VINF_SUCCESS;
2620 do
2621 {
2622 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2623 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2624 rc = rc2;
2625 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2626 return rc;
2627}
2628
2629
2630/**
2631 * Inserts a page into the cache.
2632 *
2633 * This will create user node for the page, insert it into the GCPhys
2634 * hash, and insert it into the age list.
2635 *
2636 * @returns VBox status code.
2637 * @retval VINF_SUCCESS if successfully added.
2638 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2639 * @param pPool The pool.
2640 * @param pPage The cached page.
2641 * @param GCPhys The GC physical address of the page we're gonna shadow.
2642 * @param iUser The user index.
2643 * @param iUserTable The user table index.
2644 */
2645DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2646{
2647 int rc = VINF_SUCCESS;
2648 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2649
2650 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2651
2652#ifdef VBOX_STRICT
2653 /*
2654 * Check that the entry doesn't already exists.
2655 */
2656 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2657 {
2658 uint16_t i = pPage->iUserHead;
2659 do
2660 {
2661 Assert(i < pPool->cMaxUsers);
2662 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2663 i = paUsers[i].iNext;
2664 } while (i != NIL_PGMPOOL_USER_INDEX);
2665 }
2666#endif
2667
2668 /*
2669 * Find free a user node.
2670 */
2671 uint16_t i = pPool->iUserFreeHead;
2672 if (i == NIL_PGMPOOL_USER_INDEX)
2673 {
2674 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2675 if (RT_FAILURE(rc))
2676 return rc;
2677 i = pPool->iUserFreeHead;
2678 }
2679
2680 /*
2681 * Unlink the user node from the free list,
2682 * initialize and insert it into the user list.
2683 */
2684 pPool->iUserFreeHead = paUsers[i].iNext;
2685 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2686 paUsers[i].iUser = iUser;
2687 paUsers[i].iUserTable = iUserTable;
2688 pPage->iUserHead = i;
2689
2690 /*
2691 * Insert into cache and enable monitoring of the guest page if enabled.
2692 *
2693 * Until we implement caching of all levels, including the CR3 one, we'll
2694 * have to make sure we don't try monitor & cache any recursive reuse of
2695 * a monitored CR3 page. Because all windows versions are doing this we'll
2696 * have to be able to do combined access monitoring, CR3 + PT and
2697 * PD + PT (guest PAE).
2698 *
2699 * Update:
2700 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2701 */
2702 const bool fCanBeMonitored = true;
2703 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2704 if (fCanBeMonitored)
2705 {
2706 rc = pgmPoolMonitorInsert(pPool, pPage);
2707 AssertRC(rc);
2708 }
2709 return rc;
2710}
2711
2712
2713/**
2714 * Adds a user reference to a page.
2715 *
2716 * This will move the page to the head of the
2717 *
2718 * @returns VBox status code.
2719 * @retval VINF_SUCCESS if successfully added.
2720 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2721 * @param pPool The pool.
2722 * @param pPage The cached page.
2723 * @param iUser The user index.
2724 * @param iUserTable The user table.
2725 */
2726static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2727{
2728 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2729
2730 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2731
2732# ifdef VBOX_STRICT
2733 /*
2734 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2735 */
2736 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2737 {
2738 uint16_t i = pPage->iUserHead;
2739 do
2740 {
2741 Assert(i < pPool->cMaxUsers);
2742 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2743 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2744 i = paUsers[i].iNext;
2745 } while (i != NIL_PGMPOOL_USER_INDEX);
2746 }
2747# endif
2748
2749 /*
2750 * Allocate a user node.
2751 */
2752 uint16_t i = pPool->iUserFreeHead;
2753 if (i == NIL_PGMPOOL_USER_INDEX)
2754 {
2755 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2756 if (RT_FAILURE(rc))
2757 return rc;
2758 i = pPool->iUserFreeHead;
2759 }
2760 pPool->iUserFreeHead = paUsers[i].iNext;
2761
2762 /*
2763 * Initialize the user node and insert it.
2764 */
2765 paUsers[i].iNext = pPage->iUserHead;
2766 paUsers[i].iUser = iUser;
2767 paUsers[i].iUserTable = iUserTable;
2768 pPage->iUserHead = i;
2769
2770# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2771 if (pPage->fDirty)
2772 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2773# endif
2774
2775 /*
2776 * Tell the cache to update its replacement stats for this page.
2777 */
2778 pgmPoolCacheUsed(pPool, pPage);
2779 return VINF_SUCCESS;
2780}
2781
2782
2783/**
2784 * Frees a user record associated with a page.
2785 *
2786 * This does not clear the entry in the user table, it simply replaces the
2787 * user record to the chain of free records.
2788 *
2789 * @param pPool The pool.
2790 * @param HCPhys The HC physical address of the shadow page.
2791 * @param iUser The shadow page pool index of the user table.
2792 * @param iUserTable The index into the user table (shadowed).
2793 */
2794static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2795{
2796 /*
2797 * Unlink and free the specified user entry.
2798 */
2799 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2800
2801 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2802 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2803 uint16_t i = pPage->iUserHead;
2804 if ( i != NIL_PGMPOOL_USER_INDEX
2805 && paUsers[i].iUser == iUser
2806 && paUsers[i].iUserTable == iUserTable)
2807 {
2808 pPage->iUserHead = paUsers[i].iNext;
2809
2810 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2811 paUsers[i].iNext = pPool->iUserFreeHead;
2812 pPool->iUserFreeHead = i;
2813 return;
2814 }
2815
2816 /* General: Linear search. */
2817 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2818 while (i != NIL_PGMPOOL_USER_INDEX)
2819 {
2820 if ( paUsers[i].iUser == iUser
2821 && paUsers[i].iUserTable == iUserTable)
2822 {
2823 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2824 paUsers[iPrev].iNext = paUsers[i].iNext;
2825 else
2826 pPage->iUserHead = paUsers[i].iNext;
2827
2828 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2829 paUsers[i].iNext = pPool->iUserFreeHead;
2830 pPool->iUserFreeHead = i;
2831 return;
2832 }
2833 iPrev = i;
2834 i = paUsers[i].iNext;
2835 }
2836
2837 /* Fatal: didn't find it */
2838 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2839 iUser, iUserTable, pPage->GCPhys));
2840}
2841
2842
2843/**
2844 * Gets the entry size of a shadow table.
2845 *
2846 * @param enmKind The kind of page.
2847 *
2848 * @returns The size of the entry in bytes. That is, 4 or 8.
2849 * @returns If the kind is not for a table, an assertion is raised and 0 is
2850 * returned.
2851 */
2852DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2853{
2854 switch (enmKind)
2855 {
2856 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2857 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2858 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2859 case PGMPOOLKIND_32BIT_PD:
2860 case PGMPOOLKIND_32BIT_PD_PHYS:
2861 return 4;
2862
2863 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2864 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2865 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2866 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2867 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2868 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2869 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2870 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2871 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2872 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2873 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2874 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2875 case PGMPOOLKIND_64BIT_PML4:
2876 case PGMPOOLKIND_PAE_PDPT:
2877 case PGMPOOLKIND_ROOT_NESTED:
2878 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2879 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2880 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2881 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2882 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2883 case PGMPOOLKIND_PAE_PD_PHYS:
2884 case PGMPOOLKIND_PAE_PDPT_PHYS:
2885 return 8;
2886
2887 default:
2888 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2889 }
2890}
2891
2892
2893/**
2894 * Gets the entry size of a guest table.
2895 *
2896 * @param enmKind The kind of page.
2897 *
2898 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2899 * @returns If the kind is not for a table, an assertion is raised and 0 is
2900 * returned.
2901 */
2902DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2903{
2904 switch (enmKind)
2905 {
2906 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2907 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2908 case PGMPOOLKIND_32BIT_PD:
2909 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2910 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2911 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2912 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2913 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2914 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2915 return 4;
2916
2917 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2918 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2919 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2920 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2921 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2922 case PGMPOOLKIND_64BIT_PML4:
2923 case PGMPOOLKIND_PAE_PDPT:
2924 return 8;
2925
2926 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2927 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2928 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2929 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2930 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2931 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2932 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2933 case PGMPOOLKIND_ROOT_NESTED:
2934 case PGMPOOLKIND_PAE_PD_PHYS:
2935 case PGMPOOLKIND_PAE_PDPT_PHYS:
2936 case PGMPOOLKIND_32BIT_PD_PHYS:
2937 /** @todo can we return 0? (nobody is calling this...) */
2938 AssertFailed();
2939 return 0;
2940
2941 default:
2942 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2943 }
2944}
2945
2946
2947/**
2948 * Scans one shadow page table for mappings of a physical page.
2949 *
2950 * @returns true/false indicating removal of all relevant PTEs
2951 * @param pVM The VM handle.
2952 * @param pPhysPage The guest page in question.
2953 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
2954 * @param iShw The shadow page table.
2955 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
2956 * @param cRefs The number of references made in that PT.
2957 */
2958static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte, uint16_t cRefs)
2959{
2960 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d iPte=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte, cRefs));
2961 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2962 bool bRet = false;
2963
2964 /*
2965 * Assert sanity.
2966 */
2967 Assert(cRefs == 1);
2968 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
2969 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2970 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2971
2972 /*
2973 * Then, clear the actual mappings to the page in the shadow PT.
2974 */
2975 switch (pPage->enmKind)
2976 {
2977 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2978 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2979 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2980 {
2981 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2982 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2983 uint32_t u32AndMask, u32OrMask;
2984
2985 u32AndMask = 0;
2986 u32OrMask = 0;
2987
2988 if (!fFlushPTEs)
2989 {
2990 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
2991 {
2992 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
2993 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
2994 u32OrMask = X86_PTE_RW;
2995 u32AndMask = UINT32_MAX;
2996 bRet = true;
2997 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
2998 break;
2999
3000 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3001 u32OrMask = 0;
3002 u32AndMask = ~X86_PTE_RW;
3003 bRet = true;
3004 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3005 break;
3006 default:
3007 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3008 break;
3009 }
3010 }
3011 else
3012 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3013
3014 /* Update the counter if we're removing references. */
3015 if (!u32AndMask)
3016 {
3017 Assert(pPage->cPresent >= cRefs);
3018 Assert(pPool->cPresent >= cRefs);
3019 pPage->cPresent -= cRefs;
3020 pPool->cPresent -= cRefs;
3021 }
3022
3023 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3024 {
3025 X86PTE Pte;
3026
3027 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", iPte, pPT->a[iPte], cRefs));
3028 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3029 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3030 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3031
3032 ASMAtomicWriteSize(&pPT->a[iPte].u, Pte.u);
3033 return bRet;
3034 }
3035#ifdef LOG_ENABLED
3036 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3037 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3038 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3039 {
3040 Log(("i=%d cRefs=%d\n", i, cRefs--));
3041 }
3042#endif
3043 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3044 break;
3045 }
3046
3047 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3048 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3049 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3050 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3051 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3052 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3053 {
3054 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3055 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3056 uint64_t u64AndMask, u64OrMask;
3057
3058 u64OrMask = 0;
3059 u64AndMask = 0;
3060 if (!fFlushPTEs)
3061 {
3062 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3063 {
3064 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3065 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3066 u64OrMask = X86_PTE_RW;
3067 u64AndMask = UINT64_MAX;
3068 bRet = true;
3069 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3070 break;
3071
3072 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3073 u64OrMask = 0;
3074 u64AndMask = ~((uint64_t)X86_PTE_RW);
3075 bRet = true;
3076 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3077 break;
3078
3079 default:
3080 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3081 break;
3082 }
3083 }
3084 else
3085 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3086
3087 /* Update the counter if we're removing references. */
3088 if (!u64AndMask)
3089 {
3090 Assert(pPage->cPresent >= cRefs);
3091 Assert(pPool->cPresent >= cRefs);
3092 pPage->cPresent -= cRefs;
3093 pPool->cPresent -= cRefs;
3094 }
3095
3096 if ((pPT->a[iPte].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3097 {
3098 X86PTEPAE Pte;
3099
3100 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", iPte, pPT->a[iPte], cRefs));
3101 Pte.u = (pPT->a[iPte].u & u64AndMask) | u64OrMask;
3102 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3103 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3104
3105 ASMAtomicWriteSize(&pPT->a[iPte].u, Pte.u);
3106 return bRet;
3107 }
3108#ifdef LOG_ENABLED
3109 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3110 Log(("Found %RX64 expected %RX64\n", pPT->a[iPte].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P), u64));
3111 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3112 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3113 {
3114 Log(("i=%d cRefs=%d\n", i, cRefs--));
3115 }
3116#endif
3117 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind));
3118 break;
3119 }
3120
3121#ifdef PGM_WITH_LARGE_PAGES
3122 /* Large page case only. */
3123 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3124 {
3125 Assert(HWACCMIsNestedPagingActive(pVM));
3126
3127 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3128 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3129
3130 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3131 {
3132 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64 cRefs=%#x\n", iPte, pPD->a[iPte], cRefs));
3133 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3134 pPD->a[iPte].u = 0;
3135
3136 /* Update the counter as we're removing references. */
3137 Assert(pPage->cPresent);
3138 Assert(pPool->cPresent);
3139 pPage->cPresent--;
3140 pPool->cPresent--;
3141
3142 return bRet;
3143 }
3144# ifdef LOG_ENABLED
3145 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3146 for (unsigned i = 0; i < RT_ELEMENTS(pPD->a); i++)
3147 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3148 {
3149 Log(("i=%d cRefs=%d\n", i, cRefs--));
3150 }
3151# endif
3152 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3153 break;
3154 }
3155
3156 /* AMD-V nested paging - @todo merge with EPT as we only check the parts that are identical. */
3157 case PGMPOOLKIND_PAE_PD_PHYS:
3158 {
3159 Assert(HWACCMIsNestedPagingActive(pVM));
3160
3161 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3162 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3163
3164 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3165 {
3166 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64 cRefs=%#x\n", iPte, pPD->a[iPte], cRefs));
3167 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3168 pPD->a[iPte].u = 0;
3169
3170 /* Update the counter as we're removing references. */
3171 Assert(pPage->cPresent);
3172 Assert(pPool->cPresent);
3173 pPage->cPresent--;
3174 pPool->cPresent--;
3175 return bRet;
3176 }
3177# ifdef LOG_ENABLED
3178 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3179 for (unsigned i = 0; i < RT_ELEMENTS(pPD->a); i++)
3180 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3181 {
3182 Log(("i=%d cRefs=%d\n", i, cRefs--));
3183 }
3184# endif
3185 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3186 break;
3187 }
3188#endif /* PGM_WITH_LARGE_PAGES */
3189
3190 default:
3191 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3192 }
3193 return bRet;
3194}
3195
3196
3197/**
3198 * Scans one shadow page table for mappings of a physical page.
3199 *
3200 * @param pVM The VM handle.
3201 * @param pPhysPage The guest page in question.
3202 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3203 * @param iShw The shadow page table.
3204 * @param cRefs The number of references made in that PT.
3205 */
3206static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
3207{
3208 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3209
3210 /* We should only come here with when there's only one reference to this physical page. */
3211 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3212 Assert(cRefs == 1);
3213
3214 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
3215 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3216 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage), cRefs);
3217 if (!fKeptPTEs)
3218 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3219 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3220}
3221
3222
3223/**
3224 * Flushes a list of shadow page tables mapping the same physical page.
3225 *
3226 * @param pVM The VM handle.
3227 * @param pPhysPage The guest page in question.
3228 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3229 * @param iPhysExt The physical cross reference extent list to flush.
3230 */
3231static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3232{
3233 Assert(PGMIsLockOwner(pVM));
3234 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3235 bool fKeepList = false;
3236
3237 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3238 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3239
3240 const uint16_t iPhysExtStart = iPhysExt;
3241 PPGMPOOLPHYSEXT pPhysExt;
3242 do
3243 {
3244 Assert(iPhysExt < pPool->cMaxPhysExts);
3245 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3246 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3247 {
3248 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3249 {
3250 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i], 1);
3251 if (!fKeptPTEs)
3252 {
3253 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3254 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3255 }
3256 else
3257 fKeepList = true;
3258 }
3259 }
3260 /* next */
3261 iPhysExt = pPhysExt->iNext;
3262 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3263
3264 if (!fKeepList)
3265 {
3266 /* insert the list into the free list and clear the ram range entry. */
3267 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3268 pPool->iPhysExtFreeHead = iPhysExtStart;
3269 /* Invalidate the tracking data. */
3270 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3271 }
3272
3273 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3274}
3275
3276
3277/**
3278 * Flushes all shadow page table mappings of the given guest page.
3279 *
3280 * This is typically called when the host page backing the guest one has been
3281 * replaced or when the page protection was changed due to an access handler.
3282 *
3283 * @returns VBox status code.
3284 * @retval VINF_SUCCESS if all references has been successfully cleared.
3285 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3286 * pool cleaning. FF and sync flags are set.
3287 *
3288 * @param pVM The VM handle.
3289 * @param GCPhysPage GC physical address of the page in question
3290 * @param pPhysPage The guest page in question.
3291 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3292 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3293 * flushed, it is NOT touched if this isn't necessary.
3294 * The caller MUST initialized this to @a false.
3295 */
3296int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3297{
3298 PVMCPU pVCpu = VMMGetCpu(pVM);
3299 pgmLock(pVM);
3300 int rc = VINF_SUCCESS;
3301
3302#ifdef PGM_WITH_LARGE_PAGES
3303 /* Is this page part of a large page? */
3304 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3305 {
3306 PPGMPAGE pPhysBase;
3307 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3308
3309 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3310
3311 /* Fetch the large page base. */
3312 if (GCPhysBase != GCPhysPage)
3313 {
3314 pPhysBase = pgmPhysGetPage(&pVM->pgm.s, GCPhysBase);
3315 AssertFatal(pPhysBase);
3316 }
3317 else
3318 pPhysBase = pPhysPage;
3319
3320 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3321
3322 if (PGM_PAGE_GET_PDE_TYPE(pPhysBase) == PGM_PAGE_PDE_TYPE_PDE)
3323 {
3324 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3325 PGM_PAGE_SET_PDE_TYPE(pPhysBase, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3326
3327 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3328 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pPhysBase, fFlushPTEs, pfFlushTLBs);
3329
3330 *pfFlushTLBs = true;
3331 pgmUnlock(pVM);
3332 return rc;
3333 }
3334 }
3335#else
3336 NOREF(GCPhysPage);
3337#endif /* PGM_WITH_LARGE_PAGES */
3338
3339 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3340 if (u16)
3341 {
3342 /*
3343 * The zero page is currently screwing up the tracking and we'll
3344 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3345 * is defined, zero pages won't normally be mapped. Some kind of solution
3346 * will be needed for this problem of course, but it will have to wait...
3347 */
3348 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3349 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3350 rc = VINF_PGM_GCPHYS_ALIASED;
3351 else
3352 {
3353# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3354 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3355 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3356 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3357# endif
3358
3359 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3360 pgmPoolTrackFlushGCPhysPT(pVM,
3361 pPhysPage,
3362 fFlushPTEs,
3363 PGMPOOL_TD_GET_IDX(u16),
3364 PGMPOOL_TD_GET_CREFS(u16));
3365 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3366 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3367 else
3368 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3369 *pfFlushTLBs = true;
3370
3371# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3372 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3373# endif
3374 }
3375 }
3376
3377 if (rc == VINF_PGM_GCPHYS_ALIASED)
3378 {
3379 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3380 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3381 rc = VINF_PGM_SYNC_CR3;
3382 }
3383 pgmUnlock(pVM);
3384 return rc;
3385}
3386
3387
3388/**
3389 * Scans all shadow page tables for mappings of a physical page.
3390 *
3391 * This may be slow, but it's most likely more efficient than cleaning
3392 * out the entire page pool / cache.
3393 *
3394 * @returns VBox status code.
3395 * @retval VINF_SUCCESS if all references has been successfully cleared.
3396 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3397 * a page pool cleaning.
3398 *
3399 * @param pVM The VM handle.
3400 * @param pPhysPage The guest page in question.
3401 */
3402int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3403{
3404 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3405 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3406 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3407 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3408
3409#if 1
3410 /*
3411 * There is a limit to what makes sense.
3412 */
3413 if (pPool->cPresent > 1024)
3414 {
3415 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3416 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3417 return VINF_PGM_GCPHYS_ALIASED;
3418 }
3419#endif
3420
3421 /*
3422 * Iterate all the pages until we've encountered all that in use.
3423 * This is simple but not quite optimal solution.
3424 */
3425 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3426 const uint32_t u32 = u64;
3427 unsigned cLeft = pPool->cUsedPages;
3428 unsigned iPage = pPool->cCurPages;
3429 while (--iPage >= PGMPOOL_IDX_FIRST)
3430 {
3431 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3432 if ( pPage->GCPhys != NIL_RTGCPHYS
3433 && pPage->cPresent)
3434 {
3435 switch (pPage->enmKind)
3436 {
3437 /*
3438 * We only care about shadow page tables.
3439 */
3440 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3441 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3442 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3443 {
3444 unsigned cPresent = pPage->cPresent;
3445 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3446 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3447 if (pPT->a[i].n.u1Present)
3448 {
3449 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3450 {
3451 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3452 pPT->a[i].u = 0;
3453
3454 /* Update the counter as we're removing references. */
3455 Assert(pPage->cPresent);
3456 Assert(pPool->cPresent);
3457 pPage->cPresent--;
3458 pPool->cPresent--;
3459 }
3460 if (!--cPresent)
3461 break;
3462 }
3463 break;
3464 }
3465
3466 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3467 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3468 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3469 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3470 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3471 {
3472 unsigned cPresent = pPage->cPresent;
3473 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3474 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3475 if (pPT->a[i].n.u1Present)
3476 {
3477 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3478 {
3479 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3480 pPT->a[i].u = 0;
3481
3482 /* Update the counter as we're removing references. */
3483 Assert(pPage->cPresent);
3484 Assert(pPool->cPresent);
3485 pPage->cPresent--;
3486 pPool->cPresent--;
3487 }
3488 if (!--cPresent)
3489 break;
3490 }
3491 break;
3492 }
3493#ifndef IN_RC
3494 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3495 {
3496 unsigned cPresent = pPage->cPresent;
3497 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3498 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3499 if (pPT->a[i].n.u1Present)
3500 {
3501 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3502 {
3503 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3504 pPT->a[i].u = 0;
3505
3506 /* Update the counter as we're removing references. */
3507 Assert(pPage->cPresent);
3508 Assert(pPool->cPresent);
3509 pPage->cPresent--;
3510 pPool->cPresent--;
3511 }
3512 if (!--cPresent)
3513 break;
3514 }
3515 break;
3516 }
3517#endif
3518 }
3519 if (!--cLeft)
3520 break;
3521 }
3522 }
3523
3524 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3525 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3526 return VINF_SUCCESS;
3527}
3528
3529
3530/**
3531 * Clears the user entry in a user table.
3532 *
3533 * This is used to remove all references to a page when flushing it.
3534 */
3535static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3536{
3537 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3538 Assert(pUser->iUser < pPool->cCurPages);
3539 uint32_t iUserTable = pUser->iUserTable;
3540
3541 /*
3542 * Map the user page.
3543 */
3544 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3545 union
3546 {
3547 uint64_t *pau64;
3548 uint32_t *pau32;
3549 } u;
3550 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3551
3552 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3553
3554 /* Safety precaution in case we change the paging for other modes too in the future. */
3555 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3556
3557#ifdef VBOX_STRICT
3558 /*
3559 * Some sanity checks.
3560 */
3561 switch (pUserPage->enmKind)
3562 {
3563 case PGMPOOLKIND_32BIT_PD:
3564 case PGMPOOLKIND_32BIT_PD_PHYS:
3565 Assert(iUserTable < X86_PG_ENTRIES);
3566 break;
3567 case PGMPOOLKIND_PAE_PDPT:
3568 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3569 case PGMPOOLKIND_PAE_PDPT_PHYS:
3570 Assert(iUserTable < 4);
3571 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3572 break;
3573 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3574 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3575 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3576 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3577 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3578 case PGMPOOLKIND_PAE_PD_PHYS:
3579 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3580 break;
3581 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3582 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3583 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3584 break;
3585 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3586 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3587 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3588 break;
3589 case PGMPOOLKIND_64BIT_PML4:
3590 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3591 /* GCPhys >> PAGE_SHIFT is the index here */
3592 break;
3593 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3594 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3595 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3596 break;
3597
3598 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3599 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3600 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3601 break;
3602
3603 case PGMPOOLKIND_ROOT_NESTED:
3604 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3605 break;
3606
3607 default:
3608 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3609 break;
3610 }
3611#endif /* VBOX_STRICT */
3612
3613 /*
3614 * Clear the entry in the user page.
3615 */
3616 switch (pUserPage->enmKind)
3617 {
3618 /* 32-bit entries */
3619 case PGMPOOLKIND_32BIT_PD:
3620 case PGMPOOLKIND_32BIT_PD_PHYS:
3621 ASMAtomicWriteSize(&u.pau32[iUserTable], 0);
3622 break;
3623
3624 /* 64-bit entries */
3625 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3626 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3627 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3628 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3629 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3630#if defined(IN_RC)
3631 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3632 * non-present PDPT will continue to cause page faults.
3633 */
3634 ASMReloadCR3();
3635#endif
3636 /* no break */
3637 case PGMPOOLKIND_PAE_PD_PHYS:
3638 case PGMPOOLKIND_PAE_PDPT_PHYS:
3639 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3640 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3641 case PGMPOOLKIND_64BIT_PML4:
3642 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3643 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3644 case PGMPOOLKIND_PAE_PDPT:
3645 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3646 case PGMPOOLKIND_ROOT_NESTED:
3647 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3648 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3649 ASMAtomicWriteSize(&u.pau64[iUserTable], 0);
3650 break;
3651
3652 default:
3653 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3654 }
3655}
3656
3657
3658/**
3659 * Clears all users of a page.
3660 */
3661static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3662{
3663 /*
3664 * Free all the user records.
3665 */
3666 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3667
3668 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3669 uint16_t i = pPage->iUserHead;
3670 while (i != NIL_PGMPOOL_USER_INDEX)
3671 {
3672 /* Clear enter in user table. */
3673 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3674
3675 /* Free it. */
3676 const uint16_t iNext = paUsers[i].iNext;
3677 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3678 paUsers[i].iNext = pPool->iUserFreeHead;
3679 pPool->iUserFreeHead = i;
3680
3681 /* Next. */
3682 i = iNext;
3683 }
3684 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3685}
3686
3687
3688/**
3689 * Allocates a new physical cross reference extent.
3690 *
3691 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3692 * @param pVM The VM handle.
3693 * @param piPhysExt Where to store the phys ext index.
3694 */
3695PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3696{
3697 Assert(PGMIsLockOwner(pVM));
3698 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3699 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3700 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3701 {
3702 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3703 return NULL;
3704 }
3705 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3706 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3707 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3708 *piPhysExt = iPhysExt;
3709 return pPhysExt;
3710}
3711
3712
3713/**
3714 * Frees a physical cross reference extent.
3715 *
3716 * @param pVM The VM handle.
3717 * @param iPhysExt The extent to free.
3718 */
3719void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3720{
3721 Assert(PGMIsLockOwner(pVM));
3722 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3723 Assert(iPhysExt < pPool->cMaxPhysExts);
3724 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3725 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3726 {
3727 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3728 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3729 }
3730 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3731 pPool->iPhysExtFreeHead = iPhysExt;
3732}
3733
3734
3735/**
3736 * Frees a physical cross reference extent.
3737 *
3738 * @param pVM The VM handle.
3739 * @param iPhysExt The extent to free.
3740 */
3741void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3742{
3743 Assert(PGMIsLockOwner(pVM));
3744 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3745
3746 const uint16_t iPhysExtStart = iPhysExt;
3747 PPGMPOOLPHYSEXT pPhysExt;
3748 do
3749 {
3750 Assert(iPhysExt < pPool->cMaxPhysExts);
3751 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3752 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3753 {
3754 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3755 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3756 }
3757
3758 /* next */
3759 iPhysExt = pPhysExt->iNext;
3760 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3761
3762 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3763 pPool->iPhysExtFreeHead = iPhysExtStart;
3764}
3765
3766
3767/**
3768 * Insert a reference into a list of physical cross reference extents.
3769 *
3770 * @returns The new tracking data for PGMPAGE.
3771 *
3772 * @param pVM The VM handle.
3773 * @param iPhysExt The physical extent index of the list head.
3774 * @param iShwPT The shadow page table index.
3775 * @param iPte Page table entry
3776 *
3777 */
3778static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3779{
3780 Assert(PGMIsLockOwner(pVM));
3781 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3782 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3783
3784 /* special common case. */
3785 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3786 {
3787 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3788 paPhysExts[iPhysExt].apte[2] = iPte;
3789 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3790 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3791 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3792 }
3793
3794 /* general treatment. */
3795 const uint16_t iPhysExtStart = iPhysExt;
3796 unsigned cMax = 15;
3797 for (;;)
3798 {
3799 Assert(iPhysExt < pPool->cMaxPhysExts);
3800 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3801 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3802 {
3803 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3804 paPhysExts[iPhysExt].apte[i] = iPte;
3805 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3806 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3807 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3808 }
3809 if (!--cMax)
3810 {
3811 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3812 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3813 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3814 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3815 }
3816 }
3817
3818 /* add another extent to the list. */
3819 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3820 if (!pNew)
3821 {
3822 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackNoExtentsLeft);
3823 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3824 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3825 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3826 }
3827 pNew->iNext = iPhysExtStart;
3828 pNew->aidx[0] = iShwPT;
3829 pNew->apte[0] = iPte;
3830 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
3831 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3832}
3833
3834
3835/**
3836 * Add a reference to guest physical page where extents are in use.
3837 *
3838 * @returns The new tracking data for PGMPAGE.
3839 *
3840 * @param pVM The VM handle.
3841 * @param pPhysPage Pointer to the aPages entry in the ram range.
3842 * @param u16 The ram range flags (top 16-bits).
3843 * @param iShwPT The shadow page table index.
3844 * @param iPte Page table entry
3845 */
3846uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
3847{
3848 pgmLock(pVM);
3849 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3850 {
3851 /*
3852 * Convert to extent list.
3853 */
3854 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3855 uint16_t iPhysExt;
3856 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3857 if (pPhysExt)
3858 {
3859 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3860 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3861 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3862 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
3863 pPhysExt->aidx[1] = iShwPT;
3864 pPhysExt->apte[1] = iPte;
3865 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3866 }
3867 else
3868 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3869 }
3870 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3871 {
3872 /*
3873 * Insert into the extent list.
3874 */
3875 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
3876 }
3877 else
3878 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3879 pgmUnlock(pVM);
3880 return u16;
3881}
3882
3883/**
3884 * Clear references to guest physical memory.
3885 *
3886 * @param pPool The pool.
3887 * @param pPage The page.
3888 * @param pPhysPage Pointer to the aPages entry in the ram range.
3889 * @param iPte Shadow PTE index
3890 */
3891void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
3892{
3893 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3894 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3895
3896 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3897 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3898 {
3899 PVM pVM = pPool->CTX_SUFF(pVM);
3900 pgmLock(pVM);
3901
3902 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3903 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3904 do
3905 {
3906 Assert(iPhysExt < pPool->cMaxPhysExts);
3907
3908 /*
3909 * Look for the shadow page and check if it's all freed.
3910 */
3911 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3912 {
3913 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
3914 && paPhysExts[iPhysExt].apte[i] == iPte)
3915 {
3916 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3917 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3918
3919 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3920 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3921 {
3922 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3923 pgmUnlock(pVM);
3924 return;
3925 }
3926
3927 /* we can free the node. */
3928 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3929 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3930 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3931 {
3932 /* lonely node */
3933 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3934 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3935 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3936 }
3937 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3938 {
3939 /* head */
3940 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3941 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3942 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3943 }
3944 else
3945 {
3946 /* in list */
3947 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
3948 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3949 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3950 }
3951 iPhysExt = iPhysExtNext;
3952 pgmUnlock(pVM);
3953 return;
3954 }
3955 }
3956
3957 /* next */
3958 iPhysExtPrev = iPhysExt;
3959 iPhysExt = paPhysExts[iPhysExt].iNext;
3960 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3961
3962 pgmUnlock(pVM);
3963 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3964 }
3965 else /* nothing to do */
3966 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3967}
3968
3969/**
3970 * Clear references to guest physical memory.
3971 *
3972 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3973 * is assumed to be correct, so the linear search can be skipped and we can assert
3974 * at an earlier point.
3975 *
3976 * @param pPool The pool.
3977 * @param pPage The page.
3978 * @param HCPhys The host physical address corresponding to the guest page.
3979 * @param GCPhys The guest physical address corresponding to HCPhys.
3980 * @param iPte Shadow PTE index
3981 */
3982static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
3983{
3984 /*
3985 * Walk range list.
3986 */
3987 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3988 while (pRam)
3989 {
3990 RTGCPHYS off = GCPhys - pRam->GCPhys;
3991 if (off < pRam->cb)
3992 {
3993 /* does it match? */
3994 const unsigned iPage = off >> PAGE_SHIFT;
3995 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3996#ifdef LOG_ENABLED
3997 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3998 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3999#endif
4000 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4001 {
4002 Assert(pPage->cPresent);
4003 Assert(pPool->cPresent);
4004 pPage->cPresent--;
4005 pPool->cPresent--;
4006 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4007 return;
4008 }
4009 break;
4010 }
4011 pRam = pRam->CTX_SUFF(pNext);
4012 }
4013 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4014}
4015
4016
4017/**
4018 * Clear references to guest physical memory.
4019 *
4020 * @param pPool The pool.
4021 * @param pPage The page.
4022 * @param HCPhys The host physical address corresponding to the guest page.
4023 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4024 * @param iPte Shadow pte index
4025 */
4026void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4027{
4028 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4029
4030 /*
4031 * Walk range list.
4032 */
4033 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4034 while (pRam)
4035 {
4036 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
4037 if (off < pRam->cb)
4038 {
4039 /* does it match? */
4040 const unsigned iPage = off >> PAGE_SHIFT;
4041 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4042 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4043 {
4044 Assert(pPage->cPresent);
4045 Assert(pPool->cPresent);
4046 pPage->cPresent--;
4047 pPool->cPresent--;
4048 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4049 return;
4050 }
4051 break;
4052 }
4053 pRam = pRam->CTX_SUFF(pNext);
4054 }
4055
4056 /*
4057 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4058 */
4059 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4060 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4061 while (pRam)
4062 {
4063 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4064 while (iPage-- > 0)
4065 {
4066 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4067 {
4068 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4069 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4070 Assert(pPage->cPresent);
4071 Assert(pPool->cPresent);
4072 pPage->cPresent--;
4073 pPool->cPresent--;
4074 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4075 return;
4076 }
4077 }
4078 pRam = pRam->CTX_SUFF(pNext);
4079 }
4080
4081 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
4082}
4083
4084
4085/**
4086 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4087 *
4088 * @param pPool The pool.
4089 * @param pPage The page.
4090 * @param pShwPT The shadow page table (mapping of the page).
4091 * @param pGstPT The guest page table.
4092 */
4093DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4094{
4095 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4096 if (pShwPT->a[i].n.u1Present)
4097 {
4098 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4099 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4100 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4101 if (!pPage->cPresent)
4102 break;
4103 }
4104}
4105
4106
4107/**
4108 * Clear references to guest physical memory in a PAE / 32-bit page table.
4109 *
4110 * @param pPool The pool.
4111 * @param pPage The page.
4112 * @param pShwPT The shadow page table (mapping of the page).
4113 * @param pGstPT The guest page table (just a half one).
4114 */
4115DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
4116{
4117 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4118 if (pShwPT->a[i].n.u1Present)
4119 {
4120 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4121 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4122 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4123 if (!pPage->cPresent)
4124 break;
4125 }
4126}
4127
4128
4129/**
4130 * Clear references to guest physical memory in a PAE / PAE page table.
4131 *
4132 * @param pPool The pool.
4133 * @param pPage The page.
4134 * @param pShwPT The shadow page table (mapping of the page).
4135 * @param pGstPT The guest page table.
4136 */
4137DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
4138{
4139 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4140 if (pShwPT->a[i].n.u1Present)
4141 {
4142 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4143 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4144 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
4145 if (!pPage->cPresent)
4146 break;
4147 }
4148}
4149
4150
4151/**
4152 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4153 *
4154 * @param pPool The pool.
4155 * @param pPage The page.
4156 * @param pShwPT The shadow page table (mapping of the page).
4157 */
4158DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4159{
4160 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4161 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4162 if (pShwPT->a[i].n.u1Present)
4163 {
4164 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4165 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4166 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys, i);
4167 if (!pPage->cPresent)
4168 break;
4169 }
4170}
4171
4172
4173/**
4174 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4175 *
4176 * @param pPool The pool.
4177 * @param pPage The page.
4178 * @param pShwPT The shadow page table (mapping of the page).
4179 */
4180DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
4181{
4182 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4183 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4184 if (pShwPT->a[i].n.u1Present)
4185 {
4186 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4187 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
4188 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys, i);
4189 if (!pPage->cPresent)
4190 break;
4191 }
4192}
4193
4194
4195/**
4196 * Clear references to shadowed pages in an EPT page table.
4197 *
4198 * @param pPool The pool.
4199 * @param pPage The page.
4200 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4201 */
4202DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4203{
4204 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4205 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4206 if (pShwPT->a[i].n.u1Present)
4207 {
4208 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4209 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4210 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys, i);
4211 if (!pPage->cPresent)
4212 break;
4213 }
4214}
4215
4216
4217
4218/**
4219 * Clear references to shadowed pages in a 32 bits page directory.
4220 *
4221 * @param pPool The pool.
4222 * @param pPage The page.
4223 * @param pShwPD The shadow page directory (mapping of the page).
4224 */
4225DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4226{
4227 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4228 {
4229 if ( pShwPD->a[i].n.u1Present
4230 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4231 )
4232 {
4233 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4234 if (pSubPage)
4235 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4236 else
4237 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4238 }
4239 }
4240}
4241
4242/**
4243 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4244 *
4245 * @param pPool The pool.
4246 * @param pPage The page.
4247 * @param pShwPD The shadow page directory (mapping of the page).
4248 */
4249DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4250{
4251 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4252 {
4253 if ( pShwPD->a[i].n.u1Present
4254 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4255 )
4256 {
4257#ifdef PGM_WITH_LARGE_PAGES
4258 if (pShwPD->a[i].b.u1Size)
4259 {
4260 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4261 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4262 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */, i);
4263 }
4264 else
4265#endif
4266 {
4267 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4268 if (pSubPage)
4269 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4270 else
4271 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4272 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4273 }
4274 }
4275 }
4276}
4277
4278/**
4279 * Clear references to shadowed pages in a PAE page directory pointer table.
4280 *
4281 * @param pPool The pool.
4282 * @param pPage The page.
4283 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4284 */
4285DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4286{
4287 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4288 {
4289 if ( pShwPDPT->a[i].n.u1Present
4290 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4291 )
4292 {
4293 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4294 if (pSubPage)
4295 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4296 else
4297 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4298 }
4299 }
4300}
4301
4302
4303/**
4304 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4305 *
4306 * @param pPool The pool.
4307 * @param pPage The page.
4308 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4309 */
4310DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4311{
4312 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4313 {
4314 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
4315 if (pShwPDPT->a[i].n.u1Present)
4316 {
4317 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4318 if (pSubPage)
4319 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4320 else
4321 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4322 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4323 }
4324 }
4325}
4326
4327
4328/**
4329 * Clear references to shadowed pages in a 64-bit level 4 page table.
4330 *
4331 * @param pPool The pool.
4332 * @param pPage The page.
4333 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4334 */
4335DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4336{
4337 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4338 {
4339 if (pShwPML4->a[i].n.u1Present)
4340 {
4341 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4342 if (pSubPage)
4343 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4344 else
4345 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4346 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4347 }
4348 }
4349}
4350
4351
4352/**
4353 * Clear references to shadowed pages in an EPT page directory.
4354 *
4355 * @param pPool The pool.
4356 * @param pPage The page.
4357 * @param pShwPD The shadow page directory (mapping of the page).
4358 */
4359DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4360{
4361 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4362 {
4363 if (pShwPD->a[i].n.u1Present)
4364 {
4365#ifdef PGM_WITH_LARGE_PAGES
4366 if (pShwPD->a[i].b.u1Size)
4367 {
4368 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4369 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4370 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */, i);
4371 }
4372 else
4373#endif
4374 {
4375 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4376 if (pSubPage)
4377 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4378 else
4379 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4380 }
4381 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4382 }
4383 }
4384}
4385
4386
4387/**
4388 * Clear references to shadowed pages in an EPT page directory pointer table.
4389 *
4390 * @param pPool The pool.
4391 * @param pPage The page.
4392 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4393 */
4394DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4395{
4396 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4397 {
4398 if (pShwPDPT->a[i].n.u1Present)
4399 {
4400 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4401 if (pSubPage)
4402 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4403 else
4404 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4405 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4406 }
4407 }
4408}
4409
4410
4411/**
4412 * Clears all references made by this page.
4413 *
4414 * This includes other shadow pages and GC physical addresses.
4415 *
4416 * @param pPool The pool.
4417 * @param pPage The page.
4418 */
4419static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4420{
4421 /*
4422 * Map the shadow page and take action according to the page kind.
4423 */
4424 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
4425 switch (pPage->enmKind)
4426 {
4427 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4428 {
4429 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4430 void *pvGst;
4431 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4432 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4433 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4434 break;
4435 }
4436
4437 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4438 {
4439 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4440 void *pvGst;
4441 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4442 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
4443 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4444 break;
4445 }
4446
4447 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4448 {
4449 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4450 void *pvGst;
4451 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4452 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
4453 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4454 break;
4455 }
4456
4457 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4458 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4459 {
4460 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4461 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4462 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4463 break;
4464 }
4465
4466 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4467 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4468 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4469 {
4470 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4471 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4472 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4473 break;
4474 }
4475
4476 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4477 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4478 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4479 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4480 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4481 case PGMPOOLKIND_PAE_PD_PHYS:
4482 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4483 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4484 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4485 break;
4486
4487 case PGMPOOLKIND_32BIT_PD_PHYS:
4488 case PGMPOOLKIND_32BIT_PD:
4489 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4490 break;
4491
4492 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4493 case PGMPOOLKIND_PAE_PDPT:
4494 case PGMPOOLKIND_PAE_PDPT_PHYS:
4495 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4496 break;
4497
4498 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4499 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4500 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4501 break;
4502
4503 case PGMPOOLKIND_64BIT_PML4:
4504 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4505 break;
4506
4507 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4508 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4509 break;
4510
4511 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4512 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4513 break;
4514
4515 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4516 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4517 break;
4518
4519 default:
4520 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4521 }
4522
4523 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4524 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4525 ASMMemZeroPage(pvShw);
4526 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4527 pPage->fZeroed = true;
4528 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4529 Assert(!pPage->cPresent);
4530}
4531
4532/**
4533 * Flushes a pool page.
4534 *
4535 * This moves the page to the free list after removing all user references to it.
4536 *
4537 * @returns VBox status code.
4538 * @retval VINF_SUCCESS on success.
4539 * @param pPool The pool.
4540 * @param HCPhys The HC physical address of the shadow page.
4541 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4542 */
4543int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4544{
4545 PVM pVM = pPool->CTX_SUFF(pVM);
4546 bool fFlushRequired = false;
4547
4548 int rc = VINF_SUCCESS;
4549 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4550 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4551 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4552
4553 /*
4554 * Quietly reject any attempts at flushing any of the special root pages.
4555 */
4556 if (pPage->idx < PGMPOOL_IDX_FIRST)
4557 {
4558 AssertFailed(); /* can no longer happen */
4559 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4560 return VINF_SUCCESS;
4561 }
4562
4563 pgmLock(pVM);
4564
4565 /*
4566 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4567 */
4568 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4569 {
4570 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4571 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4572 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4573 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4574 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4575 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4576 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4577 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4578 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4579 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4580 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4581 pgmUnlock(pVM);
4582 return VINF_SUCCESS;
4583 }
4584
4585#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4586 /* Start a subset so we won't run out of mapping space. */
4587 PVMCPU pVCpu = VMMGetCpu(pVM);
4588 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4589#endif
4590
4591 /*
4592 * Mark the page as being in need of an ASMMemZeroPage().
4593 */
4594 pPage->fZeroed = false;
4595
4596#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4597 if (pPage->fDirty)
4598 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4599#endif
4600
4601 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4602 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4603 fFlushRequired = true;
4604
4605 /*
4606 * Clear the page.
4607 */
4608 pgmPoolTrackClearPageUsers(pPool, pPage);
4609 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4610 pgmPoolTrackDeref(pPool, pPage);
4611 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4612
4613 /*
4614 * Flush it from the cache.
4615 */
4616 pgmPoolCacheFlushPage(pPool, pPage);
4617
4618#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4619 /* Heavy stuff done. */
4620 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4621#endif
4622
4623 /*
4624 * Deregistering the monitoring.
4625 */
4626 if (pPage->fMonitored)
4627 rc = pgmPoolMonitorFlush(pPool, pPage);
4628
4629 /*
4630 * Free the page.
4631 */
4632 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4633 pPage->iNext = pPool->iFreeHead;
4634 pPool->iFreeHead = pPage->idx;
4635 pPage->enmKind = PGMPOOLKIND_FREE;
4636 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4637 pPage->GCPhys = NIL_RTGCPHYS;
4638 pPage->fReusedFlushPending = false;
4639
4640 pPool->cUsedPages--;
4641
4642 /* Flush the TLBs of all VCPUs if required. */
4643 if ( fFlushRequired
4644 && fFlush)
4645 {
4646 PGM_INVL_ALL_VCPU_TLBS(pVM);
4647 }
4648
4649 pgmUnlock(pVM);
4650 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4651 return rc;
4652}
4653
4654
4655/**
4656 * Frees a usage of a pool page.
4657 *
4658 * The caller is responsible to updating the user table so that it no longer
4659 * references the shadow page.
4660 *
4661 * @param pPool The pool.
4662 * @param HCPhys The HC physical address of the shadow page.
4663 * @param iUser The shadow page pool index of the user table.
4664 * @param iUserTable The index into the user table (shadowed).
4665 */
4666void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4667{
4668 PVM pVM = pPool->CTX_SUFF(pVM);
4669
4670 STAM_PROFILE_START(&pPool->StatFree, a);
4671 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4672 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4673 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4674 pgmLock(pVM);
4675 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4676 if (!pPage->fCached)
4677 pgmPoolFlushPage(pPool, pPage);
4678 pgmUnlock(pVM);
4679 STAM_PROFILE_STOP(&pPool->StatFree, a);
4680}
4681
4682
4683/**
4684 * Makes one or more free page free.
4685 *
4686 * @returns VBox status code.
4687 * @retval VINF_SUCCESS on success.
4688 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4689 *
4690 * @param pPool The pool.
4691 * @param enmKind Page table kind
4692 * @param iUser The user of the page.
4693 */
4694static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4695{
4696 PVM pVM = pPool->CTX_SUFF(pVM);
4697
4698 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4699
4700 /*
4701 * If the pool isn't full grown yet, expand it.
4702 */
4703 if ( pPool->cCurPages < pPool->cMaxPages
4704#if defined(IN_RC)
4705 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4706 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4707 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4708#endif
4709 )
4710 {
4711 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4712#ifdef IN_RING3
4713 int rc = PGMR3PoolGrow(pVM);
4714#else
4715 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4716#endif
4717 if (RT_FAILURE(rc))
4718 return rc;
4719 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4720 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4721 return VINF_SUCCESS;
4722 }
4723
4724 /*
4725 * Free one cached page.
4726 */
4727 return pgmPoolCacheFreeOne(pPool, iUser);
4728}
4729
4730/**
4731 * Allocates a page from the pool.
4732 *
4733 * This page may actually be a cached page and not in need of any processing
4734 * on the callers part.
4735 *
4736 * @returns VBox status code.
4737 * @retval VINF_SUCCESS if a NEW page was allocated.
4738 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4739 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4740 * @param pVM The VM handle.
4741 * @param GCPhys The GC physical address of the page we're gonna shadow.
4742 * For 4MB and 2MB PD entries, it's the first address the
4743 * shadow PT is covering.
4744 * @param enmKind The kind of mapping.
4745 * @param enmAccess Access type for the mapping (only relevant for big pages)
4746 * @param iUser The shadow page pool index of the user table.
4747 * @param iUserTable The index into the user table (shadowed).
4748 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4749 * @param fLockPage Lock the page
4750 */
4751int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4752{
4753 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4754 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4755 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4756 *ppPage = NULL;
4757 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4758 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4759 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4760
4761 pgmLock(pVM);
4762
4763 if (pPool->fCacheEnabled)
4764 {
4765 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4766 if (RT_SUCCESS(rc2))
4767 {
4768 if (fLockPage)
4769 pgmPoolLockPage(pPool, *ppPage);
4770 pgmUnlock(pVM);
4771 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4772 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4773 return rc2;
4774 }
4775 }
4776
4777 /*
4778 * Allocate a new one.
4779 */
4780 int rc = VINF_SUCCESS;
4781 uint16_t iNew = pPool->iFreeHead;
4782 if (iNew == NIL_PGMPOOL_IDX)
4783 {
4784 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4785 if (RT_FAILURE(rc))
4786 {
4787 pgmUnlock(pVM);
4788 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4789 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4790 return rc;
4791 }
4792 iNew = pPool->iFreeHead;
4793 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4794 }
4795
4796 /* unlink the free head */
4797 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4798 pPool->iFreeHead = pPage->iNext;
4799 pPage->iNext = NIL_PGMPOOL_IDX;
4800
4801 /*
4802 * Initialize it.
4803 */
4804 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4805 pPage->enmKind = enmKind;
4806 pPage->enmAccess = enmAccess;
4807 pPage->GCPhys = GCPhys;
4808 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4809 pPage->fMonitored = false;
4810 pPage->fCached = false;
4811#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4812 pPage->fDirty = false;
4813#endif
4814 pPage->fReusedFlushPending = false;
4815 pPage->cModifications = 0;
4816 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4817 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4818 pPage->cLocked = 0;
4819 pPage->cPresent = 0;
4820 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
4821 pPage->pvLastAccessHandlerFault = 0;
4822 pPage->cLastAccessHandlerCount = 0;
4823 pPage->pvLastAccessHandlerRip = 0;
4824
4825 /*
4826 * Insert into the tracking and cache. If this fails, free the page.
4827 */
4828 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4829 if (RT_FAILURE(rc3))
4830 {
4831 pPool->cUsedPages--;
4832 pPage->enmKind = PGMPOOLKIND_FREE;
4833 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4834 pPage->GCPhys = NIL_RTGCPHYS;
4835 pPage->iNext = pPool->iFreeHead;
4836 pPool->iFreeHead = pPage->idx;
4837 pgmUnlock(pVM);
4838 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4839 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4840 return rc3;
4841 }
4842
4843 /*
4844 * Commit the allocation, clear the page and return.
4845 */
4846#ifdef VBOX_WITH_STATISTICS
4847 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4848 pPool->cUsedPagesHigh = pPool->cUsedPages;
4849#endif
4850
4851 if (!pPage->fZeroed)
4852 {
4853 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4854 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4855 ASMMemZeroPage(pv);
4856 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4857 }
4858
4859 *ppPage = pPage;
4860 if (fLockPage)
4861 pgmPoolLockPage(pPool, pPage);
4862 pgmUnlock(pVM);
4863 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4864 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4865 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4866 return rc;
4867}
4868
4869
4870/**
4871 * Frees a usage of a pool page.
4872 *
4873 * @param pVM The VM handle.
4874 * @param HCPhys The HC physical address of the shadow page.
4875 * @param iUser The shadow page pool index of the user table.
4876 * @param iUserTable The index into the user table (shadowed).
4877 */
4878void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4879{
4880 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4881 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4882 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4883}
4884
4885/**
4886 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4887 *
4888 * @returns Pointer to the shadow page structure.
4889 * @param pPool The pool.
4890 * @param HCPhys The HC physical address of the shadow page.
4891 */
4892PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4893{
4894 PVM pVM = pPool->CTX_SUFF(pVM);
4895
4896 Assert(PGMIsLockOwner(pVM));
4897
4898 /*
4899 * Look up the page.
4900 */
4901 pgmLock(pVM);
4902 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4903 pgmUnlock(pVM);
4904
4905 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4906 return pPage;
4907}
4908
4909#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
4910/**
4911 * Flush the specified page if present
4912 *
4913 * @param pVM The VM handle.
4914 * @param GCPhys Guest physical address of the page to flush
4915 */
4916void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
4917{
4918 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4919
4920 VM_ASSERT_EMT(pVM);
4921
4922 /*
4923 * Look up the GCPhys in the hash.
4924 */
4925 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
4926 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
4927 if (i == NIL_PGMPOOL_IDX)
4928 return;
4929
4930 do
4931 {
4932 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4933 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
4934 {
4935 switch (pPage->enmKind)
4936 {
4937 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4938 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4939 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4940 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4941 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4942 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4943 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4944 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4945 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4946 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4947 case PGMPOOLKIND_64BIT_PML4:
4948 case PGMPOOLKIND_32BIT_PD:
4949 case PGMPOOLKIND_PAE_PDPT:
4950 {
4951 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
4952#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4953 if (pPage->fDirty)
4954 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
4955 else
4956#endif
4957 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
4958 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
4959 pgmPoolMonitorChainFlush(pPool, pPage);
4960 return;
4961 }
4962
4963 /* ignore, no monitoring. */
4964 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4965 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4966 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4967 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4968 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4969 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4970 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4971 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4972 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4973 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4974 case PGMPOOLKIND_ROOT_NESTED:
4975 case PGMPOOLKIND_PAE_PD_PHYS:
4976 case PGMPOOLKIND_PAE_PDPT_PHYS:
4977 case PGMPOOLKIND_32BIT_PD_PHYS:
4978 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4979 break;
4980
4981 default:
4982 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4983 }
4984 }
4985
4986 /* next */
4987 i = pPage->iNext;
4988 } while (i != NIL_PGMPOOL_IDX);
4989 return;
4990}
4991#endif /* IN_RING3 */
4992
4993#ifdef IN_RING3
4994
4995
4996/**
4997 * Reset CPU on hot plugging.
4998 *
4999 * @param pVM The VM handle.
5000 * @param pVCpu The virtual CPU.
5001 */
5002void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5003{
5004 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5005
5006 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5007 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5008 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5009}
5010
5011
5012/**
5013 * Flushes the entire cache.
5014 *
5015 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5016 * this and execute this CR3 flush.
5017 *
5018 * @param pPool The pool.
5019 */
5020void pgmR3PoolReset(PVM pVM)
5021{
5022 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5023
5024 Assert(PGMIsLockOwner(pVM));
5025 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5026 LogFlow(("pgmR3PoolReset:\n"));
5027
5028 /*
5029 * If there are no pages in the pool, there is nothing to do.
5030 */
5031 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5032 {
5033 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5034 return;
5035 }
5036
5037 /*
5038 * Exit the shadow mode since we're going to clear everything,
5039 * including the root page.
5040 */
5041 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5042 {
5043 PVMCPU pVCpu = &pVM->aCpus[i];
5044 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5045 }
5046
5047 /*
5048 * Nuke the free list and reinsert all pages into it.
5049 */
5050 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5051 {
5052 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5053
5054 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5055 if (pPage->fMonitored)
5056 pgmPoolMonitorFlush(pPool, pPage);
5057 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5058 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5059 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5060 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5061 pPage->cModifications = 0;
5062 pPage->GCPhys = NIL_RTGCPHYS;
5063 pPage->enmKind = PGMPOOLKIND_FREE;
5064 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5065 Assert(pPage->idx == i);
5066 pPage->iNext = i + 1;
5067 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5068 pPage->fSeenNonGlobal = false;
5069 pPage->fMonitored = false;
5070#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5071 pPage->fDirty = false;
5072#endif
5073 pPage->fCached = false;
5074 pPage->fReusedFlushPending = false;
5075 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5076 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5077 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5078 pPage->cLocked = 0;
5079 }
5080 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5081 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5082 pPool->cUsedPages = 0;
5083
5084 /*
5085 * Zap and reinitialize the user records.
5086 */
5087 pPool->cPresent = 0;
5088 pPool->iUserFreeHead = 0;
5089 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5090 const unsigned cMaxUsers = pPool->cMaxUsers;
5091 for (unsigned i = 0; i < cMaxUsers; i++)
5092 {
5093 paUsers[i].iNext = i + 1;
5094 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5095 paUsers[i].iUserTable = 0xfffffffe;
5096 }
5097 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5098
5099 /*
5100 * Clear all the GCPhys links and rebuild the phys ext free list.
5101 */
5102 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
5103 pRam;
5104 pRam = pRam->CTX_SUFF(pNext))
5105 {
5106 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5107 while (iPage-- > 0)
5108 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
5109 }
5110
5111 pPool->iPhysExtFreeHead = 0;
5112 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5113 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5114 for (unsigned i = 0; i < cMaxPhysExts; i++)
5115 {
5116 paPhysExts[i].iNext = i + 1;
5117 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5118 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5119 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5120 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5121 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5122 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5123 }
5124 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5125
5126 /*
5127 * Just zap the modified list.
5128 */
5129 pPool->cModifiedPages = 0;
5130 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5131
5132 /*
5133 * Clear the GCPhys hash and the age list.
5134 */
5135 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5136 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5137 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5138 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5139
5140#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5141 /* Clear all dirty pages. */
5142 pPool->idxFreeDirtyPage = 0;
5143 pPool->cDirtyPages = 0;
5144 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
5145 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
5146#endif
5147
5148 /*
5149 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5150 */
5151 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
5152 {
5153 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5154 pPage->iNext = NIL_PGMPOOL_IDX;
5155 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5156 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5157 pPage->cModifications = 0;
5158 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
5159 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5160 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5161 if (pPage->fMonitored)
5162 {
5163 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
5164 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
5165 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
5166 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5167 pPool->pszAccessHandler);
5168 AssertFatalRCSuccess(rc);
5169 pgmPoolHashInsert(pPool, pPage);
5170 }
5171 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5172 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5173 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5174 }
5175
5176 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5177 {
5178 /*
5179 * Re-enter the shadowing mode and assert Sync CR3 FF.
5180 */
5181 PVMCPU pVCpu = &pVM->aCpus[i];
5182 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5183 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5184 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5185 }
5186
5187 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5188}
5189#endif /* IN_RING3 */
5190
5191#ifdef LOG_ENABLED
5192static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5193{
5194 switch(enmKind)
5195 {
5196 case PGMPOOLKIND_INVALID:
5197 return "PGMPOOLKIND_INVALID";
5198 case PGMPOOLKIND_FREE:
5199 return "PGMPOOLKIND_FREE";
5200 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5201 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5202 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5203 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5204 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5205 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5206 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5207 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5208 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5209 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5210 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5211 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5212 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5213 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5214 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5215 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5216 case PGMPOOLKIND_32BIT_PD:
5217 return "PGMPOOLKIND_32BIT_PD";
5218 case PGMPOOLKIND_32BIT_PD_PHYS:
5219 return "PGMPOOLKIND_32BIT_PD_PHYS";
5220 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5221 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5222 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5223 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5224 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5225 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5226 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5227 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5228 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5229 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5230 case PGMPOOLKIND_PAE_PD_PHYS:
5231 return "PGMPOOLKIND_PAE_PD_PHYS";
5232 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5233 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5234 case PGMPOOLKIND_PAE_PDPT:
5235 return "PGMPOOLKIND_PAE_PDPT";
5236 case PGMPOOLKIND_PAE_PDPT_PHYS:
5237 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5238 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5239 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5240 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5241 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5242 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5243 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5244 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5245 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5246 case PGMPOOLKIND_64BIT_PML4:
5247 return "PGMPOOLKIND_64BIT_PML4";
5248 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5249 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5250 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5251 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5252 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5253 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5254 case PGMPOOLKIND_ROOT_NESTED:
5255 return "PGMPOOLKIND_ROOT_NESTED";
5256 }
5257 return "Unknown kind!";
5258}
5259#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette