VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 28875

Last change on this file since 28875 was 28862, checked in by vboxsync, 15 years ago

Test fix for pgm pool gurus with smp guests

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 197.4 KB
Line 
1/* $Id: PGMAllPool.cpp 28862 2010-04-28 11:56:59Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/pgm.h>
24#include <VBox/mm.h>
25#include <VBox/em.h>
26#include <VBox/cpum.h>
27#ifdef IN_RC
28# include <VBox/patm.h>
29#endif
30#include "../PGMInternal.h"
31#include <VBox/vm.h>
32#include "../PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/hwacc_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/string.h>
40
41
42/*******************************************************************************
43* Internal Functions *
44*******************************************************************************/
45RT_C_DECLS_BEGIN
46static void pgmPoolFlushAllInt(PPGMPOOL pPool);
47DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
48DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
49static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
51static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#ifndef IN_RING3
53DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
54#endif
55#ifdef LOG_ENABLED
56static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
57#endif
58#if defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)
59static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT);
60#endif
61
62int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
63PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
64void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
65void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
66
67RT_C_DECLS_END
68
69
70/**
71 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
72 *
73 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
74 * @param enmKind The page kind.
75 */
76DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
77{
78 switch (enmKind)
79 {
80 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
82 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
83 return true;
84 default:
85 return false;
86 }
87}
88
89/** @def PGMPOOL_PAGE_2_LOCKED_PTR
90 * Maps a pool page pool into the current context and lock it (RC only).
91 *
92 * @returns VBox status code.
93 * @param pVM The VM handle.
94 * @param pPage The pool page.
95 *
96 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
97 * small page window employeed by that function. Be careful.
98 * @remark There is no need to assert on the result.
99 */
100#if defined(IN_RC)
101DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
102{
103 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
104
105 /* Make sure the dynamic mapping will not be reused. */
106 if (pv)
107 PGMDynLockHCPage(pVM, (uint8_t *)pv);
108
109 return pv;
110}
111#else
112# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
113#endif
114
115/** @def PGMPOOL_UNLOCK_PTR
116 * Unlock a previously locked dynamic caching (RC only).
117 *
118 * @returns VBox status code.
119 * @param pVM The VM handle.
120 * @param pPage The pool page.
121 *
122 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
123 * small page window employeed by that function. Be careful.
124 * @remark There is no need to assert on the result.
125 */
126#if defined(IN_RC)
127DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
128{
129 if (pvPage)
130 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
131}
132#else
133# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
134#endif
135
136
137/**
138 * Flushes a chain of pages sharing the same access monitor.
139 *
140 * @returns VBox status code suitable for scheduling.
141 * @param pPool The pool.
142 * @param pPage A page in the chain.
143 */
144int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
145{
146 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
147
148 /*
149 * Find the list head.
150 */
151 uint16_t idx = pPage->idx;
152 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
153 {
154 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
155 {
156 idx = pPage->iMonitoredPrev;
157 Assert(idx != pPage->idx);
158 pPage = &pPool->aPages[idx];
159 }
160 }
161
162 /*
163 * Iterate the list flushing each shadow page.
164 */
165 int rc = VINF_SUCCESS;
166 for (;;)
167 {
168 idx = pPage->iMonitoredNext;
169 Assert(idx != pPage->idx);
170 if (pPage->idx >= PGMPOOL_IDX_FIRST)
171 {
172 int rc2 = pgmPoolFlushPage(pPool, pPage);
173 AssertRC(rc2);
174 }
175 /* next */
176 if (idx == NIL_PGMPOOL_IDX)
177 break;
178 pPage = &pPool->aPages[idx];
179 }
180 return rc;
181}
182
183
184/**
185 * Wrapper for getting the current context pointer to the entry being modified.
186 *
187 * @returns VBox status code suitable for scheduling.
188 * @param pVM VM Handle.
189 * @param pvDst Destination address
190 * @param pvSrc Source guest virtual address.
191 * @param GCPhysSrc The source guest physical address.
192 * @param cb Size of data to read
193 */
194DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
195{
196#if defined(IN_RING3)
197 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
198 return VINF_SUCCESS;
199#else
200 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
201 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
202#endif
203}
204
205/**
206 * Process shadow entries before they are changed by the guest.
207 *
208 * For PT entries we will clear them. For PD entries, we'll simply check
209 * for mapping conflicts and set the SyncCR3 FF if found.
210 *
211 * @param pVCpu VMCPU handle
212 * @param pPool The pool.
213 * @param pPage The head page.
214 * @param GCPhysFault The guest physical fault address.
215 * @param uAddress In R0 and GC this is the guest context fault address (flat).
216 * In R3 this is the host context 'fault' address.
217 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
218 */
219void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
220{
221 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
222 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
223 PVM pVM = pPool->CTX_SUFF(pVM);
224
225 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
226
227 for (;;)
228 {
229 union
230 {
231 void *pv;
232 PX86PT pPT;
233 PX86PTPAE pPTPae;
234 PX86PD pPD;
235 PX86PDPAE pPDPae;
236 PX86PDPT pPDPT;
237 PX86PML4 pPML4;
238 } uShw;
239
240 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
241
242 uShw.pv = NULL;
243 switch (pPage->enmKind)
244 {
245 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
246 {
247 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
248 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
249 const unsigned iShw = off / sizeof(X86PTE);
250 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
251 if (uShw.pPT->a[iShw].n.u1Present)
252 {
253 X86PTE GstPte;
254
255 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
256 AssertRC(rc);
257 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
258 pgmPoolTracDerefGCPhysHint(pPool, pPage,
259 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
260 GstPte.u & X86_PTE_PG_MASK,
261 iShw);
262 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
263 }
264 break;
265 }
266
267 /* page/2 sized */
268 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
269 {
270 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
271 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
272 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
273 {
274 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
275 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
276 if (uShw.pPTPae->a[iShw].n.u1Present)
277 {
278 X86PTE GstPte;
279 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
280 AssertRC(rc);
281
282 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
283 pgmPoolTracDerefGCPhysHint(pPool, pPage,
284 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
285 GstPte.u & X86_PTE_PG_MASK,
286 iShw);
287 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
288 }
289 }
290 break;
291 }
292
293 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
294 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
295 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
296 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
297 {
298 unsigned iGst = off / sizeof(X86PDE);
299 unsigned iShwPdpt = iGst / 256;
300 unsigned iShw = (iGst % 256) * 2;
301 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
302
303 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
304 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
305 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
306 {
307 for (unsigned i = 0; i < 2; i++)
308 {
309# ifndef IN_RING0
310 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
311 {
312 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
313 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
314 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
315 break;
316 }
317 else
318# endif /* !IN_RING0 */
319 if (uShw.pPDPae->a[iShw+i].n.u1Present)
320 {
321 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
322 pgmPoolFree(pVM,
323 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
324 pPage->idx,
325 iShw + i);
326 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
327 }
328
329 /* paranoia / a bit assumptive. */
330 if ( (off & 3)
331 && (off & 3) + cbWrite > 4)
332 {
333 const unsigned iShw2 = iShw + 2 + i;
334 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
335 {
336# ifndef IN_RING0
337 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
338 {
339 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
340 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
341 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
342 break;
343 }
344 else
345# endif /* !IN_RING0 */
346 if (uShw.pPDPae->a[iShw2].n.u1Present)
347 {
348 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
349 pgmPoolFree(pVM,
350 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
351 pPage->idx,
352 iShw2);
353 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
354 }
355 }
356 }
357 }
358 }
359 break;
360 }
361
362 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
363 {
364 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
365 const unsigned iShw = off / sizeof(X86PTEPAE);
366 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
367 if (uShw.pPTPae->a[iShw].n.u1Present)
368 {
369 X86PTEPAE GstPte;
370 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
371 AssertRC(rc);
372
373 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
374 pgmPoolTracDerefGCPhysHint(pPool, pPage,
375 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
376 GstPte.u & X86_PTE_PAE_PG_MASK,
377 iShw);
378 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
379 }
380
381 /* paranoia / a bit assumptive. */
382 if ( (off & 7)
383 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
384 {
385 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
386 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
387
388 if (uShw.pPTPae->a[iShw2].n.u1Present)
389 {
390 X86PTEPAE GstPte;
391# ifdef IN_RING3
392 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
393# else
394 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
395# endif
396 AssertRC(rc);
397 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
398 pgmPoolTracDerefGCPhysHint(pPool, pPage,
399 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
400 GstPte.u & X86_PTE_PAE_PG_MASK,
401 iShw2);
402 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
403 }
404 }
405 break;
406 }
407
408 case PGMPOOLKIND_32BIT_PD:
409 {
410 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
411 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
412
413 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
414 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
415# ifndef IN_RING0
416 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
417 {
418 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
419 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
420 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
421 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
422 break;
423 }
424# endif /* !IN_RING0 */
425# ifndef IN_RING0
426 else
427# endif /* !IN_RING0 */
428 {
429 if (uShw.pPD->a[iShw].n.u1Present)
430 {
431 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
432 pgmPoolFree(pVM,
433 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
434 pPage->idx,
435 iShw);
436 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
437 }
438 }
439 /* paranoia / a bit assumptive. */
440 if ( (off & 3)
441 && (off & 3) + cbWrite > sizeof(X86PTE))
442 {
443 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
444 if ( iShw2 != iShw
445 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
446 {
447# ifndef IN_RING0
448 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
449 {
450 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
451 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
452 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
453 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
454 break;
455 }
456# endif /* !IN_RING0 */
457# ifndef IN_RING0
458 else
459# endif /* !IN_RING0 */
460 {
461 if (uShw.pPD->a[iShw2].n.u1Present)
462 {
463 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
464 pgmPoolFree(pVM,
465 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
466 pPage->idx,
467 iShw2);
468 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
469 }
470 }
471 }
472 }
473#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
474 if ( uShw.pPD->a[iShw].n.u1Present
475 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
476 {
477 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
478# ifdef IN_RC /* TLB load - we're pushing things a bit... */
479 ASMProbeReadByte(pvAddress);
480# endif
481 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
482 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
483 }
484#endif
485 break;
486 }
487
488 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
489 {
490 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
491 const unsigned iShw = off / sizeof(X86PDEPAE);
492 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
493#ifndef IN_RING0
494 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
495 {
496 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
497 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
498 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
499 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
500 break;
501 }
502#endif /* !IN_RING0 */
503 /*
504 * Causes trouble when the guest uses a PDE to refer to the whole page table level
505 * structure. (Invalidate here; faults later on when it tries to change the page
506 * table entries -> recheck; probably only applies to the RC case.)
507 */
508# ifndef IN_RING0
509 else
510# endif /* !IN_RING0 */
511 {
512 if (uShw.pPDPae->a[iShw].n.u1Present)
513 {
514 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
515 pgmPoolFree(pVM,
516 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
517 pPage->idx,
518 iShw);
519 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
520 }
521 }
522 /* paranoia / a bit assumptive. */
523 if ( (off & 7)
524 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
525 {
526 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
527 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
528
529#ifndef IN_RING0
530 if ( iShw2 != iShw
531 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
532 {
533 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
534 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
535 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
536 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
537 break;
538 }
539#endif /* !IN_RING0 */
540# ifndef IN_RING0
541 else
542# endif /* !IN_RING0 */
543 if (uShw.pPDPae->a[iShw2].n.u1Present)
544 {
545 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
546 pgmPoolFree(pVM,
547 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
548 pPage->idx,
549 iShw2);
550 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
551 }
552 }
553 break;
554 }
555
556 case PGMPOOLKIND_PAE_PDPT:
557 {
558 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
559 /*
560 * Hopefully this doesn't happen very often:
561 * - touching unused parts of the page
562 * - messing with the bits of pd pointers without changing the physical address
563 */
564 /* PDPT roots are not page aligned; 32 byte only! */
565 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
566
567 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
568 const unsigned iShw = offPdpt / sizeof(X86PDPE);
569 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
570 {
571# ifndef IN_RING0
572 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
573 {
574 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
575 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
576 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
577 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
578 break;
579 }
580# endif /* !IN_RING0 */
581# ifndef IN_RING0
582 else
583# endif /* !IN_RING0 */
584 if (uShw.pPDPT->a[iShw].n.u1Present)
585 {
586 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
587 pgmPoolFree(pVM,
588 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
589 pPage->idx,
590 iShw);
591 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
592 }
593
594 /* paranoia / a bit assumptive. */
595 if ( (offPdpt & 7)
596 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
597 {
598 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
599 if ( iShw2 != iShw
600 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
601 {
602# ifndef IN_RING0
603 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
604 {
605 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
606 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
607 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
608 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
609 break;
610 }
611# endif /* !IN_RING0 */
612# ifndef IN_RING0
613 else
614# endif /* !IN_RING0 */
615 if (uShw.pPDPT->a[iShw2].n.u1Present)
616 {
617 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
618 pgmPoolFree(pVM,
619 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
620 pPage->idx,
621 iShw2);
622 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
623 }
624 }
625 }
626 }
627 break;
628 }
629
630#ifndef IN_RC
631 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
632 {
633 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
634 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
635 const unsigned iShw = off / sizeof(X86PDEPAE);
636 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
637 if (uShw.pPDPae->a[iShw].n.u1Present)
638 {
639 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
640 pgmPoolFree(pVM,
641 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
642 pPage->idx,
643 iShw);
644 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
645 }
646 /* paranoia / a bit assumptive. */
647 if ( (off & 7)
648 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
649 {
650 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
651 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
652
653 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
654 if (uShw.pPDPae->a[iShw2].n.u1Present)
655 {
656 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
657 pgmPoolFree(pVM,
658 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
659 pPage->idx,
660 iShw2);
661 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
662 }
663 }
664 break;
665 }
666
667 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
668 {
669 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
670 /*
671 * Hopefully this doesn't happen very often:
672 * - messing with the bits of pd pointers without changing the physical address
673 */
674 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
675 const unsigned iShw = off / sizeof(X86PDPE);
676 if (uShw.pPDPT->a[iShw].n.u1Present)
677 {
678 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
679 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
680 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
681 }
682 /* paranoia / a bit assumptive. */
683 if ( (off & 7)
684 && (off & 7) + cbWrite > sizeof(X86PDPE))
685 {
686 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
687 if (uShw.pPDPT->a[iShw2].n.u1Present)
688 {
689 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
690 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
691 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
692 }
693 }
694 break;
695 }
696
697 case PGMPOOLKIND_64BIT_PML4:
698 {
699 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
700 /*
701 * Hopefully this doesn't happen very often:
702 * - messing with the bits of pd pointers without changing the physical address
703 */
704 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
705 const unsigned iShw = off / sizeof(X86PDPE);
706 if (uShw.pPML4->a[iShw].n.u1Present)
707 {
708 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
709 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
710 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
711 }
712 /* paranoia / a bit assumptive. */
713 if ( (off & 7)
714 && (off & 7) + cbWrite > sizeof(X86PDPE))
715 {
716 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
717 if (uShw.pPML4->a[iShw2].n.u1Present)
718 {
719 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
720 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
721 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
722 }
723 }
724 break;
725 }
726#endif /* IN_RING0 */
727
728 default:
729 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
730 }
731 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
732
733 /* next */
734 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
735 return;
736 pPage = &pPool->aPages[pPage->iMonitoredNext];
737 }
738}
739
740# ifndef IN_RING3
741/**
742 * Checks if a access could be a fork operation in progress.
743 *
744 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
745 *
746 * @returns true if it's likly that we're forking, otherwise false.
747 * @param pPool The pool.
748 * @param pDis The disassembled instruction.
749 * @param offFault The access offset.
750 */
751DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
752{
753 /*
754 * i386 linux is using btr to clear X86_PTE_RW.
755 * The functions involved are (2.6.16 source inspection):
756 * clear_bit
757 * ptep_set_wrprotect
758 * copy_one_pte
759 * copy_pte_range
760 * copy_pmd_range
761 * copy_pud_range
762 * copy_page_range
763 * dup_mmap
764 * dup_mm
765 * copy_mm
766 * copy_process
767 * do_fork
768 */
769 if ( pDis->pCurInstr->opcode == OP_BTR
770 && !(offFault & 4)
771 /** @todo Validate that the bit index is X86_PTE_RW. */
772 )
773 {
774 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
775 return true;
776 }
777 return false;
778}
779
780
781/**
782 * Determine whether the page is likely to have been reused.
783 *
784 * @returns true if we consider the page as being reused for a different purpose.
785 * @returns false if we consider it to still be a paging page.
786 * @param pVM VM Handle.
787 * @param pVCpu VMCPU Handle.
788 * @param pRegFrame Trap register frame.
789 * @param pDis The disassembly info for the faulting instruction.
790 * @param pvFault The fault address.
791 *
792 * @remark The REP prefix check is left to the caller because of STOSD/W.
793 */
794DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
795{
796#ifndef IN_RC
797 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
798 if ( HWACCMHasPendingIrq(pVM)
799 && (pRegFrame->rsp - pvFault) < 32)
800 {
801 /* Fault caused by stack writes while trying to inject an interrupt event. */
802 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
803 return true;
804 }
805#else
806 NOREF(pVM); NOREF(pvFault);
807#endif
808
809 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
810
811 /* Non-supervisor mode write means it's used for something else. */
812 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
813 return true;
814
815 switch (pDis->pCurInstr->opcode)
816 {
817 /* call implies the actual push of the return address faulted */
818 case OP_CALL:
819 Log4(("pgmPoolMonitorIsReused: CALL\n"));
820 return true;
821 case OP_PUSH:
822 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
823 return true;
824 case OP_PUSHF:
825 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
826 return true;
827 case OP_PUSHA:
828 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
829 return true;
830 case OP_FXSAVE:
831 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
832 return true;
833 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
834 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
835 return true;
836 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
837 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
838 return true;
839 case OP_MOVSWD:
840 case OP_STOSWD:
841 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
842 && pRegFrame->rcx >= 0x40
843 )
844 {
845 Assert(pDis->mode == CPUMODE_64BIT);
846
847 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
848 return true;
849 }
850 return false;
851 }
852 if ( ( (pDis->param1.flags & USE_REG_GEN32)
853 || (pDis->param1.flags & USE_REG_GEN64))
854 && (pDis->param1.base.reg_gen == USE_REG_ESP))
855 {
856 Log4(("pgmPoolMonitorIsReused: ESP\n"));
857 return true;
858 }
859
860 return false;
861}
862
863/**
864 * Flushes the page being accessed.
865 *
866 * @returns VBox status code suitable for scheduling.
867 * @param pVM The VM handle.
868 * @param pVCpu The VMCPU handle.
869 * @param pPool The pool.
870 * @param pPage The pool page (head).
871 * @param pDis The disassembly of the write instruction.
872 * @param pRegFrame The trap register frame.
873 * @param GCPhysFault The fault address as guest physical address.
874 * @param pvFault The fault address.
875 */
876static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
877 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
878{
879 /*
880 * First, do the flushing.
881 */
882 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
883
884 /*
885 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection). Must do this in raw mode (!); XP boot will fail otherwise
886 */
887 uint32_t cbWritten;
888 int rc2 = EMInterpretInstructionCPUEx(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten, EMCODETYPE_ALL);
889 if (RT_SUCCESS(rc2))
890 pRegFrame->rip += pDis->opsize;
891 else if (rc2 == VERR_EM_INTERPRETER)
892 {
893#ifdef IN_RC
894 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
895 {
896 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
897 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
898 rc = VINF_SUCCESS;
899 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
900 }
901 else
902#endif
903 {
904 rc = VINF_EM_RAW_EMULATE_INSTR;
905 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
906 }
907 }
908 else
909 rc = rc2;
910
911 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
912 return rc;
913}
914
915/**
916 * Handles the STOSD write accesses.
917 *
918 * @returns VBox status code suitable for scheduling.
919 * @param pVM The VM handle.
920 * @param pPool The pool.
921 * @param pPage The pool page (head).
922 * @param pDis The disassembly of the write instruction.
923 * @param pRegFrame The trap register frame.
924 * @param GCPhysFault The fault address as guest physical address.
925 * @param pvFault The fault address.
926 */
927DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
928 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
929{
930 unsigned uIncrement = pDis->param1.size;
931
932 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
933 Assert(pRegFrame->rcx <= 0x20);
934
935#ifdef VBOX_STRICT
936 if (pDis->opmode == CPUMODE_32BIT)
937 Assert(uIncrement == 4);
938 else
939 Assert(uIncrement == 8);
940#endif
941
942 Log3(("pgmPoolAccessHandlerSTOSD\n"));
943
944 /*
945 * Increment the modification counter and insert it into the list
946 * of modified pages the first time.
947 */
948 if (!pPage->cModifications++)
949 pgmPoolMonitorModifiedInsert(pPool, pPage);
950
951 /*
952 * Execute REP STOSD.
953 *
954 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
955 * write situation, meaning that it's safe to write here.
956 */
957 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
958 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
959 while (pRegFrame->rcx)
960 {
961#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
962 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
963 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
964 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
965#else
966 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
967#endif
968#ifdef IN_RC
969 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
970#else
971 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
972#endif
973 pu32 += uIncrement;
974 GCPhysFault += uIncrement;
975 pRegFrame->rdi += uIncrement;
976 pRegFrame->rcx--;
977 }
978 pRegFrame->rip += pDis->opsize;
979
980 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
981 return VINF_SUCCESS;
982}
983
984
985/**
986 * Handles the simple write accesses.
987 *
988 * @returns VBox status code suitable for scheduling.
989 * @param pVM The VM handle.
990 * @param pVCpu The VMCPU handle.
991 * @param pPool The pool.
992 * @param pPage The pool page (head).
993 * @param pDis The disassembly of the write instruction.
994 * @param pRegFrame The trap register frame.
995 * @param GCPhysFault The fault address as guest physical address.
996 * @param pvFault The fault address.
997 * @param pfReused Reused state (out)
998 */
999DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1000 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
1001{
1002 Log3(("pgmPoolAccessHandlerSimple\n"));
1003 /*
1004 * Increment the modification counter and insert it into the list
1005 * of modified pages the first time.
1006 */
1007 if (!pPage->cModifications++)
1008 pgmPoolMonitorModifiedInsert(pPool, pPage);
1009
1010 /*
1011 * Clear all the pages. ASSUMES that pvFault is readable.
1012 */
1013#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1014 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1015 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1016 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1017#else
1018 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1019#endif
1020
1021 /*
1022 * Interpret the instruction.
1023 */
1024 uint32_t cb;
1025 int rc = EMInterpretInstructionCPUEx(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb, EMCODETYPE_ALL);
1026 if (RT_SUCCESS(rc))
1027 pRegFrame->rip += pDis->opsize;
1028 else if (rc == VERR_EM_INTERPRETER)
1029 {
1030 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1031 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1032 rc = VINF_EM_RAW_EMULATE_INSTR;
1033 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1034 }
1035
1036#if 0 /* experimental code */
1037 if (rc == VINF_SUCCESS)
1038 {
1039 switch (pPage->enmKind)
1040 {
1041 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1042 {
1043 X86PTEPAE GstPte;
1044 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1045 AssertRC(rc);
1046
1047 /* Check the new value written by the guest. If present and with a bogus physical address, then
1048 * it's fairly safe to assume the guest is reusing the PT.
1049 */
1050 if (GstPte.n.u1Present)
1051 {
1052 RTHCPHYS HCPhys = -1;
1053 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1054 if (rc != VINF_SUCCESS)
1055 {
1056 *pfReused = true;
1057 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1058 }
1059 }
1060 break;
1061 }
1062 }
1063 }
1064#endif
1065
1066 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1067 return rc;
1068}
1069
1070/**
1071 * \#PF Handler callback for PT write accesses.
1072 *
1073 * @returns VBox status code (appropriate for GC return).
1074 * @param pVM VM Handle.
1075 * @param uErrorCode CPU Error code.
1076 * @param pRegFrame Trap register frame.
1077 * NULL on DMA and other non CPU access.
1078 * @param pvFault The fault address (cr2).
1079 * @param GCPhysFault The GC physical address corresponding to pvFault.
1080 * @param pvUser User argument.
1081 */
1082DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1083{
1084 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1085 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1086 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1087 PVMCPU pVCpu = VMMGetCpu(pVM);
1088 unsigned cMaxModifications;
1089 bool fForcedFlush = false;
1090
1091 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1092
1093 pgmLock(pVM);
1094 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1095 {
1096 /* Pool page changed while we were waiting for the lock; ignore. */
1097 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1098 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1099 pgmUnlock(pVM);
1100 return VINF_SUCCESS;
1101 }
1102#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1103 if (pPage->fDirty)
1104 {
1105 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1106 pgmUnlock(pVM);
1107 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1108 }
1109#endif
1110
1111#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1112 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1113 {
1114 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1115 void *pvGst;
1116 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1117 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1118 }
1119#endif
1120
1121 /*
1122 * Disassemble the faulting instruction.
1123 */
1124 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1125 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1126 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1127 {
1128 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1129 pgmUnlock(pVM);
1130 return rc;
1131 }
1132
1133 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1134
1135 /*
1136 * We should ALWAYS have the list head as user parameter. This
1137 * is because we use that page to record the changes.
1138 */
1139 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1140
1141#ifdef IN_RING0
1142 /* Maximum nr of modifications depends on the page type. */
1143 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1144 cMaxModifications = 4;
1145 else
1146 cMaxModifications = 24;
1147#else
1148 cMaxModifications = 48;
1149#endif
1150
1151 /*
1152 * Incremental page table updates should weigh more than random ones.
1153 * (Only applies when started from offset 0)
1154 */
1155 pVCpu->pgm.s.cPoolAccessHandler++;
1156 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1157 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1158 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1159 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1160 {
1161 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1162 Assert(pPage->cModifications < 32000);
1163 pPage->cModifications = pPage->cModifications * 2;
1164 pPage->pvLastAccessHandlerFault = pvFault;
1165 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1166 if (pPage->cModifications >= cMaxModifications)
1167 {
1168 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1169 fForcedFlush = true;
1170 }
1171 }
1172
1173 if (pPage->cModifications >= cMaxModifications)
1174 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1175
1176 /*
1177 * Check if it's worth dealing with.
1178 */
1179 bool fReused = false;
1180 bool fNotReusedNotForking = false;
1181 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1182 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1183 )
1184 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1185 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1186 {
1187 /*
1188 * Simple instructions, no REP prefix.
1189 */
1190 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1191 {
1192 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1193 if (fReused)
1194 goto flushPage;
1195
1196 /* A mov instruction to change the first page table entry will be remembered so we can detect
1197 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1198 */
1199 if ( rc == VINF_SUCCESS
1200 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1201 && pDis->pCurInstr->opcode == OP_MOV
1202 && (pvFault & PAGE_OFFSET_MASK) == 0)
1203 {
1204 pPage->pvLastAccessHandlerFault = pvFault;
1205 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1206 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1207 /* Make sure we don't kick out a page too quickly. */
1208 if (pPage->cModifications > 8)
1209 pPage->cModifications = 2;
1210 }
1211 else
1212 if (pPage->pvLastAccessHandlerFault == pvFault)
1213 {
1214 /* ignore the 2nd write to this page table entry. */
1215 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1216 }
1217 else
1218 {
1219 pPage->pvLastAccessHandlerFault = 0;
1220 pPage->pvLastAccessHandlerRip = 0;
1221 }
1222
1223 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1224 pgmUnlock(pVM);
1225 return rc;
1226 }
1227
1228 /*
1229 * Windows is frequently doing small memset() operations (netio test 4k+).
1230 * We have to deal with these or we'll kill the cache and performance.
1231 */
1232 if ( pDis->pCurInstr->opcode == OP_STOSWD
1233 && !pRegFrame->eflags.Bits.u1DF
1234 && pDis->opmode == pDis->mode
1235 && pDis->addrmode == pDis->mode)
1236 {
1237 bool fValidStosd = false;
1238
1239 if ( pDis->mode == CPUMODE_32BIT
1240 && pDis->prefix == PREFIX_REP
1241 && pRegFrame->ecx <= 0x20
1242 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1243 && !((uintptr_t)pvFault & 3)
1244 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1245 )
1246 {
1247 fValidStosd = true;
1248 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1249 }
1250 else
1251 if ( pDis->mode == CPUMODE_64BIT
1252 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1253 && pRegFrame->rcx <= 0x20
1254 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1255 && !((uintptr_t)pvFault & 7)
1256 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1257 )
1258 {
1259 fValidStosd = true;
1260 }
1261
1262 if (fValidStosd)
1263 {
1264 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1265 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1266 pgmUnlock(pVM);
1267 return rc;
1268 }
1269 }
1270
1271 /* REP prefix, don't bother. */
1272 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1273 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1274 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1275 fNotReusedNotForking = true;
1276 }
1277
1278#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1279 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1280 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1281 */
1282 if ( pPage->cModifications >= cMaxModifications
1283 && !fForcedFlush
1284 && pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1285 && ( fNotReusedNotForking
1286 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1287 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1288 )
1289 )
1290 {
1291 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1292 Assert(pPage->fDirty == false);
1293
1294 /* Flush any monitored duplicates as we will disable write protection. */
1295 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1296 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1297 {
1298 PPGMPOOLPAGE pPageHead = pPage;
1299
1300 /* Find the monitor head. */
1301 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1302 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1303
1304 while (pPageHead)
1305 {
1306 unsigned idxNext = pPageHead->iMonitoredNext;
1307
1308 if (pPageHead != pPage)
1309 {
1310 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1311 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1312 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1313 AssertRC(rc2);
1314 }
1315
1316 if (idxNext == NIL_PGMPOOL_IDX)
1317 break;
1318
1319 pPageHead = &pPool->aPages[idxNext];
1320 }
1321 }
1322
1323 /* The flushing above might fail for locked pages, so double check. */
1324 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1325 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1326 {
1327 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1328
1329 /* Temporarily allow write access to the page table again. */
1330 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
1331 if (rc == VINF_SUCCESS)
1332 {
1333 rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
1334 AssertMsg(rc == VINF_SUCCESS
1335 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1336 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1337 || rc == VERR_PAGE_NOT_PRESENT,
1338 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1339
1340 pPage->pvDirtyFault = pvFault;
1341
1342 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1343 pgmUnlock(pVM);
1344 return rc;
1345 }
1346 }
1347 }
1348#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1349
1350 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1351flushPage:
1352 /*
1353 * Not worth it, so flush it.
1354 *
1355 * If we considered it to be reused, don't go back to ring-3
1356 * to emulate failed instructions since we usually cannot
1357 * interpret then. This may be a bit risky, in which case
1358 * the reuse detection must be fixed.
1359 */
1360 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1361 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1362 && fReused)
1363 {
1364 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1365 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1366 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1367 }
1368 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1369 pgmUnlock(pVM);
1370 return rc;
1371}
1372
1373# endif /* !IN_RING3 */
1374
1375# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1376
1377# ifdef VBOX_STRICT
1378/**
1379 * Check references to guest physical memory in a PAE / PAE page table.
1380 *
1381 * @param pPool The pool.
1382 * @param pPage The page.
1383 * @param pShwPT The shadow page table (mapping of the page).
1384 * @param pGstPT The guest page table.
1385 */
1386static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
1387{
1388 unsigned cErrors = 0;
1389 int LastRc = -1; /* initialized to shut up gcc */
1390 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1391 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1392
1393#ifdef VBOX_STRICT
1394 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1395 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1396#endif
1397 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1398 {
1399 if (pShwPT->a[i].n.u1Present)
1400 {
1401 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1402 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1403 if ( rc != VINF_SUCCESS
1404 || (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) != HCPhys)
1405 {
1406 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1407 LastPTE = i;
1408 LastRc = rc;
1409 LastHCPhys = HCPhys;
1410 cErrors++;
1411
1412 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1413 rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pPage->GCPhys, &HCPhysPT);
1414 AssertRC(rc);
1415
1416 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1417 {
1418 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1419
1420 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1421 {
1422 PX86PTPAE pShwPT2 = (PX86PTPAE)PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pTempPage);
1423
1424 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1425 {
1426 if ( pShwPT2->a[j].n.u1Present
1427 && pShwPT2->a[j].n.u1Write
1428 && ((pShwPT2->a[j].u & X86_PTE_PAE_PG_MASK) == HCPhysPT))
1429 {
1430 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, pShwPT->a[j].u, pShwPT2->a[j].u));
1431 }
1432 }
1433 }
1434 }
1435 }
1436 }
1437 }
1438 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, pShwPT->a[LastPTE].u, LastHCPhys));
1439}
1440# endif /* VBOX_STRICT */
1441
1442/**
1443 * Clear references to guest physical memory in a PAE / PAE page table.
1444 *
1445 * @returns nr of changed PTEs
1446 * @param pPool The pool.
1447 * @param pPage The page.
1448 * @param pShwPT The shadow page table (mapping of the page).
1449 * @param pGstPT The guest page table.
1450 * @param pOldGstPT The old cached guest page table.
1451 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1452 * @param pfFlush Flush reused page table (out)
1453 */
1454DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT, PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1455{
1456 unsigned cChanged = 0;
1457
1458#ifdef VBOX_STRICT
1459 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1460 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1461#endif
1462 *pfFlush = false;
1463
1464 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1465 {
1466 /* Check the new value written by the guest. If present and with a bogus physical address, then
1467 * it's fairly safe to assume the guest is reusing the PT.
1468 */
1469 if ( fAllowRemoval
1470 && pGstPT->a[i].n.u1Present)
1471 {
1472 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1473 {
1474 *pfFlush = true;
1475 return ++cChanged;
1476 }
1477 }
1478 if (pShwPT->a[i].n.u1Present)
1479 {
1480 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1481 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1482 {
1483#ifdef VBOX_STRICT
1484 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1485 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1486 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1487#endif
1488 uint64_t uHostAttr = pShwPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1489 bool fHostRW = !!(pShwPT->a[i].u & X86_PTE_RW);
1490 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1491 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1492
1493 if ( uHostAttr == uGuestAttr
1494 && fHostRW <= fGuestRW)
1495 continue;
1496 }
1497 cChanged++;
1498 /* Something was changed, so flush it. */
1499 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1500 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1501 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1502 ASMAtomicWriteSize(&pShwPT->a[i].u, 0);
1503 }
1504 }
1505 return cChanged;
1506}
1507
1508
1509/**
1510 * Flush a dirty page
1511 *
1512 * @param pVM VM Handle.
1513 * @param pPool The pool.
1514 * @param idxSlot Dirty array slot index
1515 * @param fAllowRemoval Allow a reused page table to be removed
1516 */
1517static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1518{
1519 PPGMPOOLPAGE pPage;
1520 unsigned idxPage;
1521
1522 Assert(idxSlot < RT_ELEMENTS(pPool->aIdxDirtyPages));
1523 if (pPool->aIdxDirtyPages[idxSlot] == NIL_PGMPOOL_IDX)
1524 return;
1525
1526 idxPage = pPool->aIdxDirtyPages[idxSlot];
1527 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1528 pPage = &pPool->aPages[idxPage];
1529 Assert(pPage->idx == idxPage);
1530 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1531
1532 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1533 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1534
1535 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1536 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys);
1537 Assert(rc == VINF_SUCCESS);
1538 pPage->fDirty = false;
1539
1540#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1541 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(VMMGetCpu(pVM));
1542#endif
1543
1544#ifdef VBOX_STRICT
1545 uint64_t fFlags = 0;
1546 RTHCPHYS HCPhys;
1547 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1548 AssertMsg( ( rc == VINF_SUCCESS
1549 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1550 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1551 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1552 || rc == VERR_PAGE_NOT_PRESENT,
1553 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1554#endif
1555
1556 /* Flush those PTEs that have changed. */
1557 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1558 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1559 void *pvGst;
1560 bool fFlush;
1561 rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1562 unsigned cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst, (PCX86PTPAE)&pPool->aDirtyPages[idxSlot][0], fAllowRemoval, &fFlush);
1563 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1564 /** Note: we might want to consider keeping the dirty page active in case there were many changes. */
1565
1566 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1567 Assert(pPage->cModifications);
1568 if (cChanges < 4)
1569 pPage->cModifications = 1; /* must use > 0 here */
1570 else
1571 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1572
1573 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1574 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages))
1575 pPool->idxFreeDirtyPage = idxSlot;
1576
1577 pPool->cDirtyPages--;
1578 pPool->aIdxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1579 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1580 if (fFlush)
1581 {
1582 Assert(fAllowRemoval);
1583 Log(("Flush reused page table!\n"));
1584 pgmPoolFlushPage(pPool, pPage);
1585 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1586 }
1587 else
1588 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1589
1590#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1591 PGMDynMapPopAutoSubset(VMMGetCpu(pVM), iPrevSubset);
1592#endif
1593}
1594
1595# ifndef IN_RING3
1596/**
1597 * Add a new dirty page
1598 *
1599 * @param pVM VM Handle.
1600 * @param pPool The pool.
1601 * @param pPage The page.
1602 */
1603void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1604{
1605 unsigned idxFree;
1606
1607 Assert(PGMIsLocked(pVM));
1608 AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
1609 Assert(!pPage->fDirty);
1610
1611 idxFree = pPool->idxFreeDirtyPage;
1612 Assert(idxFree < RT_ELEMENTS(pPool->aIdxDirtyPages));
1613 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1614
1615 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aIdxDirtyPages))
1616 {
1617 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1618 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1619 }
1620 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages));
1621 AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1622
1623 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1624
1625 /* Make a copy of the guest page table as we require valid GCPhys addresses when removing
1626 * references to physical pages. (the HCPhys linear lookup is *extremely* expensive!)
1627 */
1628 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1629 void *pvGst;
1630 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1631 memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
1632#ifdef VBOX_STRICT
1633 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1634#endif
1635
1636 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1637 pPage->fDirty = true;
1638 pPage->idxDirty = idxFree;
1639 pPool->aIdxDirtyPages[idxFree] = pPage->idx;
1640 pPool->cDirtyPages++;
1641
1642 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1643 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages)
1644 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1645 {
1646 unsigned i;
1647 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1648 {
1649 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1650 if (pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1651 {
1652 pPool->idxFreeDirtyPage = idxFree;
1653 break;
1654 }
1655 }
1656 Assert(i != RT_ELEMENTS(pPool->aIdxDirtyPages));
1657 }
1658
1659 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages) || pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1660 return;
1661}
1662# endif /* !IN_RING3 */
1663
1664/**
1665 * Check if the specified page is dirty (not write monitored)
1666 *
1667 * @return dirty or not
1668 * @param pVM VM Handle.
1669 * @param GCPhys Guest physical address
1670 */
1671bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1672{
1673 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1674 Assert(PGMIsLocked(pVM));
1675 if (!pPool->cDirtyPages)
1676 return false;
1677
1678 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1679
1680 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1681 {
1682 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1683 {
1684 PPGMPOOLPAGE pPage;
1685 unsigned idxPage = pPool->aIdxDirtyPages[i];
1686
1687 pPage = &pPool->aPages[idxPage];
1688 if (pPage->GCPhys == GCPhys)
1689 return true;
1690 }
1691 }
1692 return false;
1693}
1694
1695/**
1696 * Reset all dirty pages by reinstating page monitoring.
1697 *
1698 * @param pVM VM Handle.
1699 */
1700void pgmPoolResetDirtyPages(PVM pVM)
1701{
1702 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1703 Assert(PGMIsLocked(pVM));
1704 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1705
1706 if (!pPool->cDirtyPages)
1707 return;
1708
1709 Log(("pgmPoolResetDirtyPages\n"));
1710 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1711 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1712
1713 pPool->idxFreeDirtyPage = 0;
1714 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1715 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1716 {
1717 unsigned i;
1718 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1719 {
1720 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1721 {
1722 pPool->idxFreeDirtyPage = i;
1723 break;
1724 }
1725 }
1726 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1727 }
1728
1729 Assert(pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages));
1730 return;
1731}
1732
1733/**
1734 * Reset all dirty pages by reinstating page monitoring.
1735 *
1736 * @param pVM VM Handle.
1737 * @param GCPhysPT Physical address of the page table
1738 */
1739void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1740{
1741 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1742 Assert(PGMIsLocked(pVM));
1743 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1744 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aIdxDirtyPages);
1745
1746 if (!pPool->cDirtyPages)
1747 return;
1748
1749 GCPhysPT = GCPhysPT & ~(RTGCPHYS)(PAGE_SIZE - 1);
1750
1751 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1752 {
1753 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1754 {
1755 unsigned idxPage = pPool->aIdxDirtyPages[i];
1756
1757 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1758 if (pPage->GCPhys == GCPhysPT)
1759 {
1760 idxDirtyPage = i;
1761 break;
1762 }
1763 }
1764 }
1765
1766 if (idxDirtyPage != RT_ELEMENTS(pPool->aIdxDirtyPages))
1767 {
1768 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1769 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1770 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1771 {
1772 unsigned i;
1773 for (i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1774 {
1775 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1776 {
1777 pPool->idxFreeDirtyPage = i;
1778 break;
1779 }
1780 }
1781 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1782 }
1783 }
1784}
1785
1786# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1787
1788/**
1789 * Inserts a page into the GCPhys hash table.
1790 *
1791 * @param pPool The pool.
1792 * @param pPage The page.
1793 */
1794DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1795{
1796 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1797 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1798 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1799 pPage->iNext = pPool->aiHash[iHash];
1800 pPool->aiHash[iHash] = pPage->idx;
1801}
1802
1803
1804/**
1805 * Removes a page from the GCPhys hash table.
1806 *
1807 * @param pPool The pool.
1808 * @param pPage The page.
1809 */
1810DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1811{
1812 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1813 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1814 if (pPool->aiHash[iHash] == pPage->idx)
1815 pPool->aiHash[iHash] = pPage->iNext;
1816 else
1817 {
1818 uint16_t iPrev = pPool->aiHash[iHash];
1819 for (;;)
1820 {
1821 const int16_t i = pPool->aPages[iPrev].iNext;
1822 if (i == pPage->idx)
1823 {
1824 pPool->aPages[iPrev].iNext = pPage->iNext;
1825 break;
1826 }
1827 if (i == NIL_PGMPOOL_IDX)
1828 {
1829 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1830 break;
1831 }
1832 iPrev = i;
1833 }
1834 }
1835 pPage->iNext = NIL_PGMPOOL_IDX;
1836}
1837
1838
1839/**
1840 * Frees up one cache page.
1841 *
1842 * @returns VBox status code.
1843 * @retval VINF_SUCCESS on success.
1844 * @param pPool The pool.
1845 * @param iUser The user index.
1846 */
1847static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1848{
1849#ifndef IN_RC
1850 const PVM pVM = pPool->CTX_SUFF(pVM);
1851#endif
1852 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1853 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1854
1855 /*
1856 * Select one page from the tail of the age list.
1857 */
1858 PPGMPOOLPAGE pPage;
1859 for (unsigned iLoop = 0; ; iLoop++)
1860 {
1861 uint16_t iToFree = pPool->iAgeTail;
1862 if (iToFree == iUser)
1863 iToFree = pPool->aPages[iToFree].iAgePrev;
1864/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1865 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1866 {
1867 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1868 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1869 {
1870 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1871 continue;
1872 iToFree = i;
1873 break;
1874 }
1875 }
1876*/
1877 Assert(iToFree != iUser);
1878 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1879 pPage = &pPool->aPages[iToFree];
1880
1881 /*
1882 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1883 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1884 */
1885 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1886 break;
1887 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1888 pgmPoolCacheUsed(pPool, pPage);
1889 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1890 }
1891
1892 /*
1893 * Found a usable page, flush it and return.
1894 */
1895 int rc = pgmPoolFlushPage(pPool, pPage);
1896 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1897 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
1898 if (rc == VINF_SUCCESS)
1899 PGM_INVL_ALL_VCPU_TLBS(pVM);
1900 return rc;
1901}
1902
1903
1904/**
1905 * Checks if a kind mismatch is really a page being reused
1906 * or if it's just normal remappings.
1907 *
1908 * @returns true if reused and the cached page (enmKind1) should be flushed
1909 * @returns false if not reused.
1910 * @param enmKind1 The kind of the cached page.
1911 * @param enmKind2 The kind of the requested page.
1912 */
1913static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1914{
1915 switch (enmKind1)
1916 {
1917 /*
1918 * Never reuse them. There is no remapping in non-paging mode.
1919 */
1920 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1921 case PGMPOOLKIND_32BIT_PD_PHYS:
1922 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1923 case PGMPOOLKIND_PAE_PD_PHYS:
1924 case PGMPOOLKIND_PAE_PDPT_PHYS:
1925 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1926 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1927 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1928 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1929 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1930 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1931 return false;
1932
1933 /*
1934 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1935 */
1936 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1937 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1938 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1939 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1940 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1941 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1942 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1943 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1944 case PGMPOOLKIND_32BIT_PD:
1945 case PGMPOOLKIND_PAE_PDPT:
1946 switch (enmKind2)
1947 {
1948 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1949 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1950 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1951 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1952 case PGMPOOLKIND_64BIT_PML4:
1953 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1954 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1955 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1956 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1957 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1958 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1959 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1960 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1961 return true;
1962 default:
1963 return false;
1964 }
1965
1966 /*
1967 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1968 */
1969 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1970 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1971 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1972 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1973 case PGMPOOLKIND_64BIT_PML4:
1974 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1975 switch (enmKind2)
1976 {
1977 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1978 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1979 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1980 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1981 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1982 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1983 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1984 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1985 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1986 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1987 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1988 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1989 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1990 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1991 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1992 return true;
1993 default:
1994 return false;
1995 }
1996
1997 /*
1998 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1999 */
2000 case PGMPOOLKIND_ROOT_NESTED:
2001 return false;
2002
2003 default:
2004 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2005 }
2006}
2007
2008
2009/**
2010 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2011 *
2012 * @returns VBox status code.
2013 * @retval VINF_PGM_CACHED_PAGE on success.
2014 * @retval VERR_FILE_NOT_FOUND if not found.
2015 * @param pPool The pool.
2016 * @param GCPhys The GC physical address of the page we're gonna shadow.
2017 * @param enmKind The kind of mapping.
2018 * @param enmAccess Access type for the mapping (only relevant for big pages)
2019 * @param iUser The shadow page pool index of the user table.
2020 * @param iUserTable The index into the user table (shadowed).
2021 * @param ppPage Where to store the pointer to the page.
2022 */
2023static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2024{
2025#ifndef IN_RC
2026 const PVM pVM = pPool->CTX_SUFF(pVM);
2027#endif
2028 /*
2029 * Look up the GCPhys in the hash.
2030 */
2031 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2032 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2033 if (i != NIL_PGMPOOL_IDX)
2034 {
2035 do
2036 {
2037 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2038 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2039 if (pPage->GCPhys == GCPhys)
2040 {
2041 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2042 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2043 {
2044 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2045 * doesn't flush it in case there are no more free use records.
2046 */
2047 pgmPoolCacheUsed(pPool, pPage);
2048
2049 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2050 if (RT_SUCCESS(rc))
2051 {
2052 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2053 *ppPage = pPage;
2054 if (pPage->cModifications)
2055 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2056 STAM_COUNTER_INC(&pPool->StatCacheHits);
2057 return VINF_PGM_CACHED_PAGE;
2058 }
2059 return rc;
2060 }
2061
2062 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2063 {
2064 /*
2065 * The kind is different. In some cases we should now flush the page
2066 * as it has been reused, but in most cases this is normal remapping
2067 * of PDs as PT or big pages using the GCPhys field in a slightly
2068 * different way than the other kinds.
2069 */
2070 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2071 {
2072 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2073 pgmPoolFlushPage(pPool, pPage);
2074 break;
2075 }
2076 }
2077 }
2078
2079 /* next */
2080 i = pPage->iNext;
2081 } while (i != NIL_PGMPOOL_IDX);
2082 }
2083
2084 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2085 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2086 return VERR_FILE_NOT_FOUND;
2087}
2088
2089
2090/**
2091 * Inserts a page into the cache.
2092 *
2093 * @param pPool The pool.
2094 * @param pPage The cached page.
2095 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2096 */
2097static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2098{
2099 /*
2100 * Insert into the GCPhys hash if the page is fit for that.
2101 */
2102 Assert(!pPage->fCached);
2103 if (fCanBeCached)
2104 {
2105 pPage->fCached = true;
2106 pgmPoolHashInsert(pPool, pPage);
2107 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2108 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2109 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2110 }
2111 else
2112 {
2113 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2114 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2115 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2116 }
2117
2118 /*
2119 * Insert at the head of the age list.
2120 */
2121 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2122 pPage->iAgeNext = pPool->iAgeHead;
2123 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2124 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2125 else
2126 pPool->iAgeTail = pPage->idx;
2127 pPool->iAgeHead = pPage->idx;
2128}
2129
2130
2131/**
2132 * Flushes a cached page.
2133 *
2134 * @param pPool The pool.
2135 * @param pPage The cached page.
2136 */
2137static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2138{
2139 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2140
2141 /*
2142 * Remove the page from the hash.
2143 */
2144 if (pPage->fCached)
2145 {
2146 pPage->fCached = false;
2147 pgmPoolHashRemove(pPool, pPage);
2148 }
2149 else
2150 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2151
2152 /*
2153 * Remove it from the age list.
2154 */
2155 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2156 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2157 else
2158 pPool->iAgeTail = pPage->iAgePrev;
2159 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2160 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2161 else
2162 pPool->iAgeHead = pPage->iAgeNext;
2163 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2164 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2165}
2166
2167
2168/**
2169 * Looks for pages sharing the monitor.
2170 *
2171 * @returns Pointer to the head page.
2172 * @returns NULL if not found.
2173 * @param pPool The Pool
2174 * @param pNewPage The page which is going to be monitored.
2175 */
2176static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2177{
2178 /*
2179 * Look up the GCPhys in the hash.
2180 */
2181 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2182 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2183 if (i == NIL_PGMPOOL_IDX)
2184 return NULL;
2185 do
2186 {
2187 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2188 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2189 && pPage != pNewPage)
2190 {
2191 switch (pPage->enmKind)
2192 {
2193 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2194 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2195 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2196 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2197 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2198 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2199 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2200 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2201 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2202 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2203 case PGMPOOLKIND_64BIT_PML4:
2204 case PGMPOOLKIND_32BIT_PD:
2205 case PGMPOOLKIND_PAE_PDPT:
2206 {
2207 /* find the head */
2208 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2209 {
2210 Assert(pPage->iMonitoredPrev != pPage->idx);
2211 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2212 }
2213 return pPage;
2214 }
2215
2216 /* ignore, no monitoring. */
2217 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2218 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2219 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2220 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2221 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2222 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2223 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2224 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2225 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2226 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2227 case PGMPOOLKIND_ROOT_NESTED:
2228 case PGMPOOLKIND_PAE_PD_PHYS:
2229 case PGMPOOLKIND_PAE_PDPT_PHYS:
2230 case PGMPOOLKIND_32BIT_PD_PHYS:
2231 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2232 break;
2233 default:
2234 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2235 }
2236 }
2237
2238 /* next */
2239 i = pPage->iNext;
2240 } while (i != NIL_PGMPOOL_IDX);
2241 return NULL;
2242}
2243
2244
2245/**
2246 * Enabled write monitoring of a guest page.
2247 *
2248 * @returns VBox status code.
2249 * @retval VINF_SUCCESS on success.
2250 * @param pPool The pool.
2251 * @param pPage The cached page.
2252 */
2253static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2254{
2255 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
2256
2257 /*
2258 * Filter out the relevant kinds.
2259 */
2260 switch (pPage->enmKind)
2261 {
2262 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2263 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2264 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2265 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2266 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2267 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2268 case PGMPOOLKIND_64BIT_PML4:
2269 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2270 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2271 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2272 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2273 case PGMPOOLKIND_32BIT_PD:
2274 case PGMPOOLKIND_PAE_PDPT:
2275 break;
2276
2277 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2278 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2279 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2280 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2281 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2282 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2283 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2284 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2285 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2286 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2287 case PGMPOOLKIND_ROOT_NESTED:
2288 /* Nothing to monitor here. */
2289 return VINF_SUCCESS;
2290
2291 case PGMPOOLKIND_32BIT_PD_PHYS:
2292 case PGMPOOLKIND_PAE_PDPT_PHYS:
2293 case PGMPOOLKIND_PAE_PD_PHYS:
2294 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2295 /* Nothing to monitor here. */
2296 return VINF_SUCCESS;
2297 default:
2298 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2299 }
2300
2301 /*
2302 * Install handler.
2303 */
2304 int rc;
2305 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2306 if (pPageHead)
2307 {
2308 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2309 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2310
2311#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2312 if (pPageHead->fDirty)
2313 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2314#endif
2315
2316 pPage->iMonitoredPrev = pPageHead->idx;
2317 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2318 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2319 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2320 pPageHead->iMonitoredNext = pPage->idx;
2321 rc = VINF_SUCCESS;
2322 }
2323 else
2324 {
2325 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2326 PVM pVM = pPool->CTX_SUFF(pVM);
2327 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2328 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2329 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
2330 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2331 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2332 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2333 pPool->pszAccessHandler);
2334 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2335 * the heap size should suffice. */
2336 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2337 PVMCPU pVCpu = VMMGetCpu(pVM);
2338 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2339 }
2340 pPage->fMonitored = true;
2341 return rc;
2342}
2343
2344
2345/**
2346 * Disables write monitoring of a guest page.
2347 *
2348 * @returns VBox status code.
2349 * @retval VINF_SUCCESS on success.
2350 * @param pPool The pool.
2351 * @param pPage The cached page.
2352 */
2353static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2354{
2355 /*
2356 * Filter out the relevant kinds.
2357 */
2358 switch (pPage->enmKind)
2359 {
2360 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2361 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2362 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2363 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2364 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2365 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2366 case PGMPOOLKIND_64BIT_PML4:
2367 case PGMPOOLKIND_32BIT_PD:
2368 case PGMPOOLKIND_PAE_PDPT:
2369 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2370 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2371 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2372 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2373 break;
2374
2375 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2376 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2377 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2378 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2379 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2380 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2381 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2382 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2383 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2384 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2385 case PGMPOOLKIND_ROOT_NESTED:
2386 case PGMPOOLKIND_PAE_PD_PHYS:
2387 case PGMPOOLKIND_PAE_PDPT_PHYS:
2388 case PGMPOOLKIND_32BIT_PD_PHYS:
2389 /* Nothing to monitor here. */
2390 Assert(!pPage->fMonitored);
2391 return VINF_SUCCESS;
2392
2393 default:
2394 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2395 }
2396 Assert(pPage->fMonitored);
2397
2398 /*
2399 * Remove the page from the monitored list or uninstall it if last.
2400 */
2401 const PVM pVM = pPool->CTX_SUFF(pVM);
2402 int rc;
2403 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2404 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2405 {
2406 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2407 {
2408 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2409 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2410 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2411 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2412 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2413 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2414 pPool->pszAccessHandler);
2415 AssertFatalRCSuccess(rc);
2416 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2417 }
2418 else
2419 {
2420 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2421 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2422 {
2423 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2424 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2425 }
2426 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2427 rc = VINF_SUCCESS;
2428 }
2429 }
2430 else
2431 {
2432 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2433 AssertFatalRC(rc);
2434 PVMCPU pVCpu = VMMGetCpu(pVM);
2435 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2436 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2437 }
2438 pPage->fMonitored = false;
2439
2440 /*
2441 * Remove it from the list of modified pages (if in it).
2442 */
2443 pgmPoolMonitorModifiedRemove(pPool, pPage);
2444
2445 return rc;
2446}
2447
2448
2449/**
2450 * Inserts the page into the list of modified pages.
2451 *
2452 * @param pPool The pool.
2453 * @param pPage The page.
2454 */
2455void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2456{
2457 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2458 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2459 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2460 && pPool->iModifiedHead != pPage->idx,
2461 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2462 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2463 pPool->iModifiedHead, pPool->cModifiedPages));
2464
2465 pPage->iModifiedNext = pPool->iModifiedHead;
2466 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2467 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2468 pPool->iModifiedHead = pPage->idx;
2469 pPool->cModifiedPages++;
2470#ifdef VBOX_WITH_STATISTICS
2471 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2472 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2473#endif
2474}
2475
2476
2477/**
2478 * Removes the page from the list of modified pages and resets the
2479 * moficiation counter.
2480 *
2481 * @param pPool The pool.
2482 * @param pPage The page which is believed to be in the list of modified pages.
2483 */
2484static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2485{
2486 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2487 if (pPool->iModifiedHead == pPage->idx)
2488 {
2489 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2490 pPool->iModifiedHead = pPage->iModifiedNext;
2491 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2492 {
2493 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2494 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2495 }
2496 pPool->cModifiedPages--;
2497 }
2498 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2499 {
2500 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2501 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2502 {
2503 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2504 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2505 }
2506 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2507 pPool->cModifiedPages--;
2508 }
2509 else
2510 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2511 pPage->cModifications = 0;
2512}
2513
2514
2515/**
2516 * Zaps the list of modified pages, resetting their modification counters in the process.
2517 *
2518 * @param pVM The VM handle.
2519 */
2520static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2521{
2522 pgmLock(pVM);
2523 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2524 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2525
2526 unsigned cPages = 0; NOREF(cPages);
2527
2528#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2529 pgmPoolResetDirtyPages(pVM);
2530#endif
2531
2532 uint16_t idx = pPool->iModifiedHead;
2533 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2534 while (idx != NIL_PGMPOOL_IDX)
2535 {
2536 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2537 idx = pPage->iModifiedNext;
2538 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2539 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2540 pPage->cModifications = 0;
2541 Assert(++cPages);
2542 }
2543 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2544 pPool->cModifiedPages = 0;
2545 pgmUnlock(pVM);
2546}
2547
2548
2549/**
2550 * Handle SyncCR3 pool tasks
2551 *
2552 * @returns VBox status code.
2553 * @retval VINF_SUCCESS if successfully added.
2554 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2555 * @param pVCpu The VMCPU handle.
2556 * @remark Should only be used when monitoring is available, thus placed in
2557 * the PGMPOOL_WITH_MONITORING #ifdef.
2558 */
2559int pgmPoolSyncCR3(PVMCPU pVCpu)
2560{
2561 PVM pVM = pVCpu->CTX_SUFF(pVM);
2562 LogFlow(("pgmPoolSyncCR3\n"));
2563
2564 /*
2565 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2566 * Occasionally we will have to clear all the shadow page tables because we wanted
2567 * to monitor a page which was mapped by too many shadowed page tables. This operation
2568 * sometimes refered to as a 'lightweight flush'.
2569 */
2570# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2571 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2572 pgmR3PoolClearAll(pVM);
2573# else /* !IN_RING3 */
2574 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2575 {
2576 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2577 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2578
2579 /* Make sure all other VCPUs return to ring 3. */
2580 if (pVM->cCpus > 1)
2581 {
2582 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2583 PGM_INVL_ALL_VCPU_TLBS(pVM);
2584 }
2585 return VINF_PGM_SYNC_CR3;
2586 }
2587# endif /* !IN_RING3 */
2588 else
2589 {
2590 pgmPoolMonitorModifiedClearAll(pVM);
2591
2592 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2593 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2594 {
2595 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2596 return pgmPoolSyncCR3(pVCpu);
2597 }
2598 }
2599 return VINF_SUCCESS;
2600}
2601
2602
2603/**
2604 * Frees up at least one user entry.
2605 *
2606 * @returns VBox status code.
2607 * @retval VINF_SUCCESS if successfully added.
2608 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2609 * @param pPool The pool.
2610 * @param iUser The user index.
2611 */
2612static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2613{
2614 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2615 /*
2616 * Just free cached pages in a braindead fashion.
2617 */
2618 /** @todo walk the age list backwards and free the first with usage. */
2619 int rc = VINF_SUCCESS;
2620 do
2621 {
2622 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2623 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2624 rc = rc2;
2625 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2626 return rc;
2627}
2628
2629
2630/**
2631 * Inserts a page into the cache.
2632 *
2633 * This will create user node for the page, insert it into the GCPhys
2634 * hash, and insert it into the age list.
2635 *
2636 * @returns VBox status code.
2637 * @retval VINF_SUCCESS if successfully added.
2638 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2639 * @param pPool The pool.
2640 * @param pPage The cached page.
2641 * @param GCPhys The GC physical address of the page we're gonna shadow.
2642 * @param iUser The user index.
2643 * @param iUserTable The user table index.
2644 */
2645DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2646{
2647 int rc = VINF_SUCCESS;
2648 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2649
2650 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2651
2652#ifdef VBOX_STRICT
2653 /*
2654 * Check that the entry doesn't already exists.
2655 */
2656 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2657 {
2658 uint16_t i = pPage->iUserHead;
2659 do
2660 {
2661 Assert(i < pPool->cMaxUsers);
2662 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2663 i = paUsers[i].iNext;
2664 } while (i != NIL_PGMPOOL_USER_INDEX);
2665 }
2666#endif
2667
2668 /*
2669 * Find free a user node.
2670 */
2671 uint16_t i = pPool->iUserFreeHead;
2672 if (i == NIL_PGMPOOL_USER_INDEX)
2673 {
2674 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2675 if (RT_FAILURE(rc))
2676 return rc;
2677 i = pPool->iUserFreeHead;
2678 }
2679
2680 /*
2681 * Unlink the user node from the free list,
2682 * initialize and insert it into the user list.
2683 */
2684 pPool->iUserFreeHead = paUsers[i].iNext;
2685 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2686 paUsers[i].iUser = iUser;
2687 paUsers[i].iUserTable = iUserTable;
2688 pPage->iUserHead = i;
2689
2690 /*
2691 * Insert into cache and enable monitoring of the guest page if enabled.
2692 *
2693 * Until we implement caching of all levels, including the CR3 one, we'll
2694 * have to make sure we don't try monitor & cache any recursive reuse of
2695 * a monitored CR3 page. Because all windows versions are doing this we'll
2696 * have to be able to do combined access monitoring, CR3 + PT and
2697 * PD + PT (guest PAE).
2698 *
2699 * Update:
2700 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2701 */
2702 const bool fCanBeMonitored = true;
2703 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2704 if (fCanBeMonitored)
2705 {
2706 rc = pgmPoolMonitorInsert(pPool, pPage);
2707 AssertRC(rc);
2708 }
2709 return rc;
2710}
2711
2712
2713/**
2714 * Adds a user reference to a page.
2715 *
2716 * This will move the page to the head of the
2717 *
2718 * @returns VBox status code.
2719 * @retval VINF_SUCCESS if successfully added.
2720 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2721 * @param pPool The pool.
2722 * @param pPage The cached page.
2723 * @param iUser The user index.
2724 * @param iUserTable The user table.
2725 */
2726static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2727{
2728 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2729
2730 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2731
2732# ifdef VBOX_STRICT
2733 /*
2734 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2735 */
2736 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2737 {
2738 uint16_t i = pPage->iUserHead;
2739 do
2740 {
2741 Assert(i < pPool->cMaxUsers);
2742 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2743 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2744 i = paUsers[i].iNext;
2745 } while (i != NIL_PGMPOOL_USER_INDEX);
2746 }
2747# endif
2748
2749 /*
2750 * Allocate a user node.
2751 */
2752 uint16_t i = pPool->iUserFreeHead;
2753 if (i == NIL_PGMPOOL_USER_INDEX)
2754 {
2755 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2756 if (RT_FAILURE(rc))
2757 return rc;
2758 i = pPool->iUserFreeHead;
2759 }
2760 pPool->iUserFreeHead = paUsers[i].iNext;
2761
2762 /*
2763 * Initialize the user node and insert it.
2764 */
2765 paUsers[i].iNext = pPage->iUserHead;
2766 paUsers[i].iUser = iUser;
2767 paUsers[i].iUserTable = iUserTable;
2768 pPage->iUserHead = i;
2769
2770# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2771 if (pPage->fDirty)
2772 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2773# endif
2774
2775 /*
2776 * Tell the cache to update its replacement stats for this page.
2777 */
2778 pgmPoolCacheUsed(pPool, pPage);
2779 return VINF_SUCCESS;
2780}
2781
2782
2783/**
2784 * Frees a user record associated with a page.
2785 *
2786 * This does not clear the entry in the user table, it simply replaces the
2787 * user record to the chain of free records.
2788 *
2789 * @param pPool The pool.
2790 * @param HCPhys The HC physical address of the shadow page.
2791 * @param iUser The shadow page pool index of the user table.
2792 * @param iUserTable The index into the user table (shadowed).
2793 */
2794static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2795{
2796 /*
2797 * Unlink and free the specified user entry.
2798 */
2799 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2800
2801 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2802 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2803 uint16_t i = pPage->iUserHead;
2804 if ( i != NIL_PGMPOOL_USER_INDEX
2805 && paUsers[i].iUser == iUser
2806 && paUsers[i].iUserTable == iUserTable)
2807 {
2808 pPage->iUserHead = paUsers[i].iNext;
2809
2810 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2811 paUsers[i].iNext = pPool->iUserFreeHead;
2812 pPool->iUserFreeHead = i;
2813 return;
2814 }
2815
2816 /* General: Linear search. */
2817 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2818 while (i != NIL_PGMPOOL_USER_INDEX)
2819 {
2820 if ( paUsers[i].iUser == iUser
2821 && paUsers[i].iUserTable == iUserTable)
2822 {
2823 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2824 paUsers[iPrev].iNext = paUsers[i].iNext;
2825 else
2826 pPage->iUserHead = paUsers[i].iNext;
2827
2828 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2829 paUsers[i].iNext = pPool->iUserFreeHead;
2830 pPool->iUserFreeHead = i;
2831 return;
2832 }
2833 iPrev = i;
2834 i = paUsers[i].iNext;
2835 }
2836
2837 /* Fatal: didn't find it */
2838 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2839 iUser, iUserTable, pPage->GCPhys));
2840}
2841
2842
2843/**
2844 * Gets the entry size of a shadow table.
2845 *
2846 * @param enmKind The kind of page.
2847 *
2848 * @returns The size of the entry in bytes. That is, 4 or 8.
2849 * @returns If the kind is not for a table, an assertion is raised and 0 is
2850 * returned.
2851 */
2852DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2853{
2854 switch (enmKind)
2855 {
2856 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2857 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2858 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2859 case PGMPOOLKIND_32BIT_PD:
2860 case PGMPOOLKIND_32BIT_PD_PHYS:
2861 return 4;
2862
2863 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2864 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2865 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2866 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2867 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2868 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2869 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2870 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2871 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2872 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2873 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2874 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2875 case PGMPOOLKIND_64BIT_PML4:
2876 case PGMPOOLKIND_PAE_PDPT:
2877 case PGMPOOLKIND_ROOT_NESTED:
2878 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2879 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2880 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2881 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2882 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2883 case PGMPOOLKIND_PAE_PD_PHYS:
2884 case PGMPOOLKIND_PAE_PDPT_PHYS:
2885 return 8;
2886
2887 default:
2888 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2889 }
2890}
2891
2892
2893/**
2894 * Gets the entry size of a guest table.
2895 *
2896 * @param enmKind The kind of page.
2897 *
2898 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2899 * @returns If the kind is not for a table, an assertion is raised and 0 is
2900 * returned.
2901 */
2902DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2903{
2904 switch (enmKind)
2905 {
2906 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2907 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2908 case PGMPOOLKIND_32BIT_PD:
2909 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2910 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2911 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2912 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2913 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2914 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2915 return 4;
2916
2917 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2918 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2919 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2920 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2921 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2922 case PGMPOOLKIND_64BIT_PML4:
2923 case PGMPOOLKIND_PAE_PDPT:
2924 return 8;
2925
2926 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2927 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2928 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2929 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2930 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2931 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2932 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2933 case PGMPOOLKIND_ROOT_NESTED:
2934 case PGMPOOLKIND_PAE_PD_PHYS:
2935 case PGMPOOLKIND_PAE_PDPT_PHYS:
2936 case PGMPOOLKIND_32BIT_PD_PHYS:
2937 /** @todo can we return 0? (nobody is calling this...) */
2938 AssertFailed();
2939 return 0;
2940
2941 default:
2942 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2943 }
2944}
2945
2946
2947/**
2948 * Scans one shadow page table for mappings of a physical page.
2949 *
2950 * @returns true/false indicating removal of all relevant PTEs
2951 * @param pVM The VM handle.
2952 * @param pPhysPage The guest page in question.
2953 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
2954 * @param iShw The shadow page table.
2955 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
2956 * @param cRefs The number of references made in that PT.
2957 */
2958static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte, uint16_t cRefs)
2959{
2960 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d iPte=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte, cRefs));
2961 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2962 bool bRet = false;
2963
2964 /*
2965 * Assert sanity.
2966 */
2967 Assert(cRefs == 1);
2968 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
2969 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2970 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2971
2972 /*
2973 * Then, clear the actual mappings to the page in the shadow PT.
2974 */
2975 switch (pPage->enmKind)
2976 {
2977 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2978 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2979 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2980 {
2981 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2982 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2983 uint32_t u32AndMask, u32OrMask;
2984
2985 u32AndMask = 0;
2986 u32OrMask = 0;
2987
2988 if (!fFlushPTEs)
2989 {
2990 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
2991 {
2992 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
2993 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
2994 u32OrMask = X86_PTE_RW;
2995 u32AndMask = UINT32_MAX;
2996 bRet = true;
2997 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
2998 break;
2999
3000 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3001 u32OrMask = 0;
3002 u32AndMask = ~X86_PTE_RW;
3003 bRet = true;
3004 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3005 break;
3006 default:
3007 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3008 break;
3009 }
3010 }
3011 else
3012 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3013
3014 /* Update the counter if we're removing references. */
3015 if (!u32AndMask)
3016 {
3017 Assert(pPage->cPresent >= cRefs);
3018 Assert(pPool->cPresent >= cRefs);
3019 pPage->cPresent -= cRefs;
3020 pPool->cPresent -= cRefs;
3021 }
3022
3023 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3024 {
3025 X86PTE Pte;
3026
3027 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", iPte, pPT->a[iPte], cRefs));
3028 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3029 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3030 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3031
3032 ASMAtomicWriteSize(&pPT->a[iPte].u, Pte.u);
3033 return bRet;
3034 }
3035#ifdef LOG_ENABLED
3036 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3037 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3038 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3039 {
3040 Log(("i=%d cRefs=%d\n", i, cRefs--));
3041 }
3042#endif
3043 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3044 break;
3045 }
3046
3047 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3048 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3049 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3050 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3051 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3052 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3053 {
3054 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3055 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3056 uint64_t u64AndMask, u64OrMask;
3057
3058 u64OrMask = 0;
3059 u64AndMask = 0;
3060 if (!fFlushPTEs)
3061 {
3062 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3063 {
3064 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3065 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3066 u64OrMask = X86_PTE_RW;
3067 u64AndMask = UINT64_MAX;
3068 bRet = true;
3069 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3070 break;
3071
3072 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3073 u64OrMask = 0;
3074 u64AndMask = ~((uint64_t)X86_PTE_RW);
3075 bRet = true;
3076 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3077 break;
3078
3079 default:
3080 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3081 break;
3082 }
3083 }
3084 else
3085 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3086
3087 /* Update the counter if we're removing references. */
3088 if (!u64AndMask)
3089 {
3090 Assert(pPage->cPresent >= cRefs);
3091 Assert(pPool->cPresent >= cRefs);
3092 pPage->cPresent -= cRefs;
3093 pPool->cPresent -= cRefs;
3094 }
3095
3096 if ((pPT->a[iPte].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3097 {
3098 X86PTEPAE Pte;
3099
3100 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", iPte, pPT->a[iPte], cRefs));
3101 Pte.u = (pPT->a[iPte].u & u64AndMask) | u64OrMask;
3102 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3103 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3104
3105 ASMAtomicWriteSize(&pPT->a[iPte].u, Pte.u);
3106 return bRet;
3107 }
3108#ifdef LOG_ENABLED
3109 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3110 Log(("Found %RX64 expected %RX64\n", pPT->a[iPte].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P), u64));
3111 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3112 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3113 {
3114 Log(("i=%d cRefs=%d\n", i, cRefs--));
3115 }
3116#endif
3117 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind));
3118 break;
3119 }
3120
3121#ifdef PGM_WITH_LARGE_PAGES
3122 /* Large page case only. */
3123 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3124 {
3125 Assert(HWACCMIsNestedPagingActive(pVM));
3126
3127 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3128 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3129
3130 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3131 {
3132 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64 cRefs=%#x\n", iPte, pPD->a[iPte], cRefs));
3133 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3134 pPD->a[iPte].u = 0;
3135
3136 /* Update the counter as we're removing references. */
3137 Assert(pPage->cPresent);
3138 Assert(pPool->cPresent);
3139 pPage->cPresent--;
3140 pPool->cPresent--;
3141
3142 return bRet;
3143 }
3144# ifdef LOG_ENABLED
3145 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3146 for (unsigned i = 0; i < RT_ELEMENTS(pPD->a); i++)
3147 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3148 {
3149 Log(("i=%d cRefs=%d\n", i, cRefs--));
3150 }
3151# endif
3152 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3153 break;
3154 }
3155
3156 /* AMD-V nested paging - @todo merge with EPT as we only check the parts that are identical. */
3157 case PGMPOOLKIND_PAE_PD_PHYS:
3158 {
3159 Assert(HWACCMIsNestedPagingActive(pVM));
3160
3161 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3162 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3163
3164 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3165 {
3166 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64 cRefs=%#x\n", iPte, pPD->a[iPte], cRefs));
3167 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3168 pPD->a[iPte].u = 0;
3169
3170 /* Update the counter as we're removing references. */
3171 Assert(pPage->cPresent);
3172 Assert(pPool->cPresent);
3173 pPage->cPresent--;
3174 pPool->cPresent--;
3175 return bRet;
3176 }
3177# ifdef LOG_ENABLED
3178 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3179 for (unsigned i = 0; i < RT_ELEMENTS(pPD->a); i++)
3180 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3181 {
3182 Log(("i=%d cRefs=%d\n", i, cRefs--));
3183 }
3184# endif
3185 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3186 break;
3187 }
3188#endif /* PGM_WITH_LARGE_PAGES */
3189
3190 default:
3191 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3192 }
3193 return bRet;
3194}
3195
3196
3197/**
3198 * Scans one shadow page table for mappings of a physical page.
3199 *
3200 * @param pVM The VM handle.
3201 * @param pPhysPage The guest page in question.
3202 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3203 * @param iShw The shadow page table.
3204 * @param cRefs The number of references made in that PT.
3205 */
3206static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
3207{
3208 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3209
3210 /* We should only come here with when there's only one reference to this physical page. */
3211 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3212 Assert(cRefs == 1);
3213
3214 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
3215 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3216 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage), cRefs);
3217 if (!fKeptPTEs)
3218 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3219 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3220}
3221
3222
3223/**
3224 * Flushes a list of shadow page tables mapping the same physical page.
3225 *
3226 * @param pVM The VM handle.
3227 * @param pPhysPage The guest page in question.
3228 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3229 * @param iPhysExt The physical cross reference extent list to flush.
3230 */
3231static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3232{
3233 Assert(PGMIsLockOwner(pVM));
3234 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3235 bool fKeepList = false;
3236
3237 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3238 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3239
3240 const uint16_t iPhysExtStart = iPhysExt;
3241 PPGMPOOLPHYSEXT pPhysExt;
3242 do
3243 {
3244 Assert(iPhysExt < pPool->cMaxPhysExts);
3245 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3246 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3247 {
3248 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3249 {
3250 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i], 1);
3251 if (!fKeptPTEs)
3252 {
3253 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3254 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3255 }
3256 else
3257 fKeepList = true;
3258 }
3259 }
3260 /* next */
3261 iPhysExt = pPhysExt->iNext;
3262 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3263
3264 if (!fKeepList)
3265 {
3266 /* insert the list into the free list and clear the ram range entry. */
3267 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3268 pPool->iPhysExtFreeHead = iPhysExtStart;
3269 /* Invalidate the tracking data. */
3270 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3271 }
3272
3273 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3274}
3275
3276
3277/**
3278 * Flushes all shadow page table mappings of the given guest page.
3279 *
3280 * This is typically called when the host page backing the guest one has been
3281 * replaced or when the page protection was changed due to an access handler.
3282 *
3283 * @returns VBox status code.
3284 * @retval VINF_SUCCESS if all references has been successfully cleared.
3285 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3286 * pool cleaning. FF and sync flags are set.
3287 *
3288 * @param pVM The VM handle.
3289 * @param GCPhysPage GC physical address of the page in question
3290 * @param pPhysPage The guest page in question.
3291 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3292 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3293 * flushed, it is NOT touched if this isn't necessary.
3294 * The caller MUST initialized this to @a false.
3295 */
3296int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3297{
3298 PVMCPU pVCpu = VMMGetCpu(pVM);
3299 pgmLock(pVM);
3300 int rc = VINF_SUCCESS;
3301
3302#ifdef PGM_WITH_LARGE_PAGES
3303 /* Is this page part of a large page? */
3304 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3305 {
3306 PPGMPAGE pPhysBase;
3307 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3308
3309 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3310
3311 /* Fetch the large page base. */
3312 if (GCPhysBase != GCPhysPage)
3313 {
3314 pPhysBase = pgmPhysGetPage(&pVM->pgm.s, GCPhysBase);
3315 AssertFatal(pPhysBase);
3316 }
3317 else
3318 pPhysBase = pPhysPage;
3319
3320 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3321
3322 if (PGM_PAGE_GET_PDE_TYPE(pPhysBase) == PGM_PAGE_PDE_TYPE_PDE)
3323 {
3324 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3325 PGM_PAGE_SET_PDE_TYPE(pPhysBase, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3326
3327 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3328 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pPhysBase, fFlushPTEs, pfFlushTLBs);
3329
3330 *pfFlushTLBs = true;
3331 pgmUnlock(pVM);
3332 return rc;
3333 }
3334 }
3335#else
3336 NOREF(GCPhysPage);
3337#endif /* PGM_WITH_LARGE_PAGES */
3338
3339 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3340 if (u16)
3341 {
3342 /*
3343 * The zero page is currently screwing up the tracking and we'll
3344 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3345 * is defined, zero pages won't normally be mapped. Some kind of solution
3346 * will be needed for this problem of course, but it will have to wait...
3347 */
3348 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3349 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3350 rc = VINF_PGM_GCPHYS_ALIASED;
3351 else
3352 {
3353# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3354 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3355 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3356 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3357# endif
3358
3359 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3360 pgmPoolTrackFlushGCPhysPT(pVM,
3361 pPhysPage,
3362 fFlushPTEs,
3363 PGMPOOL_TD_GET_IDX(u16),
3364 PGMPOOL_TD_GET_CREFS(u16));
3365 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3366 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3367 else
3368 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3369 *pfFlushTLBs = true;
3370
3371# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3372 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3373# endif
3374 }
3375 }
3376
3377 if (rc == VINF_PGM_GCPHYS_ALIASED)
3378 {
3379 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3380 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3381 rc = VINF_PGM_SYNC_CR3;
3382 }
3383 pgmUnlock(pVM);
3384 return rc;
3385}
3386
3387
3388/**
3389 * Scans all shadow page tables for mappings of a physical page.
3390 *
3391 * This may be slow, but it's most likely more efficient than cleaning
3392 * out the entire page pool / cache.
3393 *
3394 * @returns VBox status code.
3395 * @retval VINF_SUCCESS if all references has been successfully cleared.
3396 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3397 * a page pool cleaning.
3398 *
3399 * @param pVM The VM handle.
3400 * @param pPhysPage The guest page in question.
3401 */
3402int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3403{
3404 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3405 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3406 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3407 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3408
3409 /*
3410 * There is a limit to what makes sense.
3411 */
3412 if ( pPool->cPresent > 1024
3413 && pVM->cCpus == 1)
3414 {
3415 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3416 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3417 return VINF_PGM_GCPHYS_ALIASED;
3418 }
3419
3420 /*
3421 * Iterate all the pages until we've encountered all that in use.
3422 * This is simple but not quite optimal solution.
3423 */
3424 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3425 const uint32_t u32 = u64;
3426 unsigned cLeft = pPool->cUsedPages;
3427 unsigned iPage = pPool->cCurPages;
3428 while (--iPage >= PGMPOOL_IDX_FIRST)
3429 {
3430 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3431 if ( pPage->GCPhys != NIL_RTGCPHYS
3432 && pPage->cPresent)
3433 {
3434 switch (pPage->enmKind)
3435 {
3436 /*
3437 * We only care about shadow page tables.
3438 */
3439 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3440 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3441 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3442 {
3443 unsigned cPresent = pPage->cPresent;
3444 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3445 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3446 if (pPT->a[i].n.u1Present)
3447 {
3448 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3449 {
3450 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3451 pPT->a[i].u = 0;
3452
3453 /* Update the counter as we're removing references. */
3454 Assert(pPage->cPresent);
3455 Assert(pPool->cPresent);
3456 pPage->cPresent--;
3457 pPool->cPresent--;
3458 }
3459 if (!--cPresent)
3460 break;
3461 }
3462 break;
3463 }
3464
3465 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3466 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3467 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3468 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3469 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3470 {
3471 unsigned cPresent = pPage->cPresent;
3472 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3473 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3474 if (pPT->a[i].n.u1Present)
3475 {
3476 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3477 {
3478 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3479 pPT->a[i].u = 0;
3480
3481 /* Update the counter as we're removing references. */
3482 Assert(pPage->cPresent);
3483 Assert(pPool->cPresent);
3484 pPage->cPresent--;
3485 pPool->cPresent--;
3486 }
3487 if (!--cPresent)
3488 break;
3489 }
3490 break;
3491 }
3492#ifndef IN_RC
3493 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3494 {
3495 unsigned cPresent = pPage->cPresent;
3496 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3497 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3498 if (pPT->a[i].n.u1Present)
3499 {
3500 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3501 {
3502 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3503 pPT->a[i].u = 0;
3504
3505 /* Update the counter as we're removing references. */
3506 Assert(pPage->cPresent);
3507 Assert(pPool->cPresent);
3508 pPage->cPresent--;
3509 pPool->cPresent--;
3510 }
3511 if (!--cPresent)
3512 break;
3513 }
3514 break;
3515 }
3516#endif
3517 }
3518 if (!--cLeft)
3519 break;
3520 }
3521 }
3522
3523 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3524 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3525
3526 /*
3527 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3528 */
3529 if (pPool->cPresent > 1024)
3530 {
3531 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3532 return VINF_PGM_GCPHYS_ALIASED;
3533 }
3534
3535 return VINF_SUCCESS;
3536}
3537
3538
3539/**
3540 * Clears the user entry in a user table.
3541 *
3542 * This is used to remove all references to a page when flushing it.
3543 */
3544static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3545{
3546 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3547 Assert(pUser->iUser < pPool->cCurPages);
3548 uint32_t iUserTable = pUser->iUserTable;
3549
3550 /*
3551 * Map the user page.
3552 */
3553 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3554 union
3555 {
3556 uint64_t *pau64;
3557 uint32_t *pau32;
3558 } u;
3559 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3560
3561 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3562
3563 /* Safety precaution in case we change the paging for other modes too in the future. */
3564 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3565
3566#ifdef VBOX_STRICT
3567 /*
3568 * Some sanity checks.
3569 */
3570 switch (pUserPage->enmKind)
3571 {
3572 case PGMPOOLKIND_32BIT_PD:
3573 case PGMPOOLKIND_32BIT_PD_PHYS:
3574 Assert(iUserTable < X86_PG_ENTRIES);
3575 break;
3576 case PGMPOOLKIND_PAE_PDPT:
3577 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3578 case PGMPOOLKIND_PAE_PDPT_PHYS:
3579 Assert(iUserTable < 4);
3580 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3581 break;
3582 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3583 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3584 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3585 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3586 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3587 case PGMPOOLKIND_PAE_PD_PHYS:
3588 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3589 break;
3590 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3591 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3592 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3593 break;
3594 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3595 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3596 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3597 break;
3598 case PGMPOOLKIND_64BIT_PML4:
3599 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3600 /* GCPhys >> PAGE_SHIFT is the index here */
3601 break;
3602 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3603 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3604 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3605 break;
3606
3607 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3608 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3609 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3610 break;
3611
3612 case PGMPOOLKIND_ROOT_NESTED:
3613 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3614 break;
3615
3616 default:
3617 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3618 break;
3619 }
3620#endif /* VBOX_STRICT */
3621
3622 /*
3623 * Clear the entry in the user page.
3624 */
3625 switch (pUserPage->enmKind)
3626 {
3627 /* 32-bit entries */
3628 case PGMPOOLKIND_32BIT_PD:
3629 case PGMPOOLKIND_32BIT_PD_PHYS:
3630 ASMAtomicWriteSize(&u.pau32[iUserTable], 0);
3631 break;
3632
3633 /* 64-bit entries */
3634 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3635 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3636 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3637 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3638 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3639#if defined(IN_RC)
3640 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3641 * non-present PDPT will continue to cause page faults.
3642 */
3643 ASMReloadCR3();
3644#endif
3645 /* no break */
3646 case PGMPOOLKIND_PAE_PD_PHYS:
3647 case PGMPOOLKIND_PAE_PDPT_PHYS:
3648 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3649 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3650 case PGMPOOLKIND_64BIT_PML4:
3651 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3652 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3653 case PGMPOOLKIND_PAE_PDPT:
3654 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3655 case PGMPOOLKIND_ROOT_NESTED:
3656 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3657 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3658 ASMAtomicWriteSize(&u.pau64[iUserTable], 0);
3659 break;
3660
3661 default:
3662 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3663 }
3664}
3665
3666
3667/**
3668 * Clears all users of a page.
3669 */
3670static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3671{
3672 /*
3673 * Free all the user records.
3674 */
3675 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3676
3677 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3678 uint16_t i = pPage->iUserHead;
3679 while (i != NIL_PGMPOOL_USER_INDEX)
3680 {
3681 /* Clear enter in user table. */
3682 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3683
3684 /* Free it. */
3685 const uint16_t iNext = paUsers[i].iNext;
3686 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3687 paUsers[i].iNext = pPool->iUserFreeHead;
3688 pPool->iUserFreeHead = i;
3689
3690 /* Next. */
3691 i = iNext;
3692 }
3693 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3694}
3695
3696
3697/**
3698 * Allocates a new physical cross reference extent.
3699 *
3700 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3701 * @param pVM The VM handle.
3702 * @param piPhysExt Where to store the phys ext index.
3703 */
3704PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3705{
3706 Assert(PGMIsLockOwner(pVM));
3707 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3708 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3709 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3710 {
3711 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3712 return NULL;
3713 }
3714 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3715 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3716 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3717 *piPhysExt = iPhysExt;
3718 return pPhysExt;
3719}
3720
3721
3722/**
3723 * Frees a physical cross reference extent.
3724 *
3725 * @param pVM The VM handle.
3726 * @param iPhysExt The extent to free.
3727 */
3728void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3729{
3730 Assert(PGMIsLockOwner(pVM));
3731 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3732 Assert(iPhysExt < pPool->cMaxPhysExts);
3733 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3734 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3735 {
3736 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3737 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3738 }
3739 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3740 pPool->iPhysExtFreeHead = iPhysExt;
3741}
3742
3743
3744/**
3745 * Frees a physical cross reference extent.
3746 *
3747 * @param pVM The VM handle.
3748 * @param iPhysExt The extent to free.
3749 */
3750void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3751{
3752 Assert(PGMIsLockOwner(pVM));
3753 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3754
3755 const uint16_t iPhysExtStart = iPhysExt;
3756 PPGMPOOLPHYSEXT pPhysExt;
3757 do
3758 {
3759 Assert(iPhysExt < pPool->cMaxPhysExts);
3760 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3761 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3762 {
3763 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3764 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3765 }
3766
3767 /* next */
3768 iPhysExt = pPhysExt->iNext;
3769 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3770
3771 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3772 pPool->iPhysExtFreeHead = iPhysExtStart;
3773}
3774
3775
3776/**
3777 * Insert a reference into a list of physical cross reference extents.
3778 *
3779 * @returns The new tracking data for PGMPAGE.
3780 *
3781 * @param pVM The VM handle.
3782 * @param iPhysExt The physical extent index of the list head.
3783 * @param iShwPT The shadow page table index.
3784 * @param iPte Page table entry
3785 *
3786 */
3787static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3788{
3789 Assert(PGMIsLockOwner(pVM));
3790 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3791 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3792
3793 /* special common case. */
3794 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3795 {
3796 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3797 paPhysExts[iPhysExt].apte[2] = iPte;
3798 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3799 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3800 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3801 }
3802
3803 /* general treatment. */
3804 const uint16_t iPhysExtStart = iPhysExt;
3805 unsigned cMax = 15;
3806 for (;;)
3807 {
3808 Assert(iPhysExt < pPool->cMaxPhysExts);
3809 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3810 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3811 {
3812 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3813 paPhysExts[iPhysExt].apte[i] = iPte;
3814 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3815 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3816 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3817 }
3818 if (!--cMax)
3819 {
3820 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3821 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3822 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3823 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3824 }
3825 }
3826
3827 /* add another extent to the list. */
3828 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3829 if (!pNew)
3830 {
3831 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackNoExtentsLeft);
3832 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3833 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3834 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3835 }
3836 pNew->iNext = iPhysExtStart;
3837 pNew->aidx[0] = iShwPT;
3838 pNew->apte[0] = iPte;
3839 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
3840 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3841}
3842
3843
3844/**
3845 * Add a reference to guest physical page where extents are in use.
3846 *
3847 * @returns The new tracking data for PGMPAGE.
3848 *
3849 * @param pVM The VM handle.
3850 * @param pPhysPage Pointer to the aPages entry in the ram range.
3851 * @param u16 The ram range flags (top 16-bits).
3852 * @param iShwPT The shadow page table index.
3853 * @param iPte Page table entry
3854 */
3855uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
3856{
3857 pgmLock(pVM);
3858 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3859 {
3860 /*
3861 * Convert to extent list.
3862 */
3863 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3864 uint16_t iPhysExt;
3865 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3866 if (pPhysExt)
3867 {
3868 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3869 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3870 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3871 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
3872 pPhysExt->aidx[1] = iShwPT;
3873 pPhysExt->apte[1] = iPte;
3874 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3875 }
3876 else
3877 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3878 }
3879 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3880 {
3881 /*
3882 * Insert into the extent list.
3883 */
3884 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
3885 }
3886 else
3887 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3888 pgmUnlock(pVM);
3889 return u16;
3890}
3891
3892/**
3893 * Clear references to guest physical memory.
3894 *
3895 * @param pPool The pool.
3896 * @param pPage The page.
3897 * @param pPhysPage Pointer to the aPages entry in the ram range.
3898 * @param iPte Shadow PTE index
3899 */
3900void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
3901{
3902 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3903 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3904
3905 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3906 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3907 {
3908 PVM pVM = pPool->CTX_SUFF(pVM);
3909 pgmLock(pVM);
3910
3911 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3912 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3913 do
3914 {
3915 Assert(iPhysExt < pPool->cMaxPhysExts);
3916
3917 /*
3918 * Look for the shadow page and check if it's all freed.
3919 */
3920 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3921 {
3922 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
3923 && paPhysExts[iPhysExt].apte[i] == iPte)
3924 {
3925 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3926 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3927
3928 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3929 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3930 {
3931 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3932 pgmUnlock(pVM);
3933 return;
3934 }
3935
3936 /* we can free the node. */
3937 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3938 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3939 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3940 {
3941 /* lonely node */
3942 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3943 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3944 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3945 }
3946 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3947 {
3948 /* head */
3949 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3950 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3951 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3952 }
3953 else
3954 {
3955 /* in list */
3956 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
3957 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3958 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3959 }
3960 iPhysExt = iPhysExtNext;
3961 pgmUnlock(pVM);
3962 return;
3963 }
3964 }
3965
3966 /* next */
3967 iPhysExtPrev = iPhysExt;
3968 iPhysExt = paPhysExts[iPhysExt].iNext;
3969 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3970
3971 pgmUnlock(pVM);
3972 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3973 }
3974 else /* nothing to do */
3975 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3976}
3977
3978/**
3979 * Clear references to guest physical memory.
3980 *
3981 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3982 * is assumed to be correct, so the linear search can be skipped and we can assert
3983 * at an earlier point.
3984 *
3985 * @param pPool The pool.
3986 * @param pPage The page.
3987 * @param HCPhys The host physical address corresponding to the guest page.
3988 * @param GCPhys The guest physical address corresponding to HCPhys.
3989 * @param iPte Shadow PTE index
3990 */
3991static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
3992{
3993 /*
3994 * Walk range list.
3995 */
3996 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3997 while (pRam)
3998 {
3999 RTGCPHYS off = GCPhys - pRam->GCPhys;
4000 if (off < pRam->cb)
4001 {
4002 /* does it match? */
4003 const unsigned iPage = off >> PAGE_SHIFT;
4004 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4005#ifdef LOG_ENABLED
4006 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
4007 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4008#endif
4009 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4010 {
4011 Assert(pPage->cPresent);
4012 Assert(pPool->cPresent);
4013 pPage->cPresent--;
4014 pPool->cPresent--;
4015 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4016 return;
4017 }
4018 break;
4019 }
4020 pRam = pRam->CTX_SUFF(pNext);
4021 }
4022 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4023}
4024
4025
4026/**
4027 * Clear references to guest physical memory.
4028 *
4029 * @param pPool The pool.
4030 * @param pPage The page.
4031 * @param HCPhys The host physical address corresponding to the guest page.
4032 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4033 * @param iPte Shadow pte index
4034 */
4035void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4036{
4037 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4038
4039 /*
4040 * Walk range list.
4041 */
4042 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4043 while (pRam)
4044 {
4045 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
4046 if (off < pRam->cb)
4047 {
4048 /* does it match? */
4049 const unsigned iPage = off >> PAGE_SHIFT;
4050 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4051 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4052 {
4053 Assert(pPage->cPresent);
4054 Assert(pPool->cPresent);
4055 pPage->cPresent--;
4056 pPool->cPresent--;
4057 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4058 return;
4059 }
4060 break;
4061 }
4062 pRam = pRam->CTX_SUFF(pNext);
4063 }
4064
4065 /*
4066 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4067 */
4068 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4069 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4070 while (pRam)
4071 {
4072 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4073 while (iPage-- > 0)
4074 {
4075 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4076 {
4077 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4078 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4079 Assert(pPage->cPresent);
4080 Assert(pPool->cPresent);
4081 pPage->cPresent--;
4082 pPool->cPresent--;
4083 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4084 return;
4085 }
4086 }
4087 pRam = pRam->CTX_SUFF(pNext);
4088 }
4089
4090 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
4091}
4092
4093
4094/**
4095 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4096 *
4097 * @param pPool The pool.
4098 * @param pPage The page.
4099 * @param pShwPT The shadow page table (mapping of the page).
4100 * @param pGstPT The guest page table.
4101 */
4102DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4103{
4104 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4105 if (pShwPT->a[i].n.u1Present)
4106 {
4107 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4108 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4109 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4110 if (!pPage->cPresent)
4111 break;
4112 }
4113}
4114
4115
4116/**
4117 * Clear references to guest physical memory in a PAE / 32-bit page table.
4118 *
4119 * @param pPool The pool.
4120 * @param pPage The page.
4121 * @param pShwPT The shadow page table (mapping of the page).
4122 * @param pGstPT The guest page table (just a half one).
4123 */
4124DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
4125{
4126 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4127 if (pShwPT->a[i].n.u1Present)
4128 {
4129 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4130 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4131 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4132 if (!pPage->cPresent)
4133 break;
4134 }
4135}
4136
4137
4138/**
4139 * Clear references to guest physical memory in a PAE / PAE page table.
4140 *
4141 * @param pPool The pool.
4142 * @param pPage The page.
4143 * @param pShwPT The shadow page table (mapping of the page).
4144 * @param pGstPT The guest page table.
4145 */
4146DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
4147{
4148 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4149 if (pShwPT->a[i].n.u1Present)
4150 {
4151 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4152 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4153 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
4154 if (!pPage->cPresent)
4155 break;
4156 }
4157}
4158
4159
4160/**
4161 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4162 *
4163 * @param pPool The pool.
4164 * @param pPage The page.
4165 * @param pShwPT The shadow page table (mapping of the page).
4166 */
4167DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4168{
4169 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4170 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4171 if (pShwPT->a[i].n.u1Present)
4172 {
4173 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4174 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4175 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys, i);
4176 if (!pPage->cPresent)
4177 break;
4178 }
4179}
4180
4181
4182/**
4183 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4184 *
4185 * @param pPool The pool.
4186 * @param pPage The page.
4187 * @param pShwPT The shadow page table (mapping of the page).
4188 */
4189DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
4190{
4191 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4192 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4193 if (pShwPT->a[i].n.u1Present)
4194 {
4195 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4196 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
4197 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys, i);
4198 if (!pPage->cPresent)
4199 break;
4200 }
4201}
4202
4203
4204/**
4205 * Clear references to shadowed pages in an EPT page table.
4206 *
4207 * @param pPool The pool.
4208 * @param pPage The page.
4209 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4210 */
4211DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4212{
4213 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4214 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4215 if (pShwPT->a[i].n.u1Present)
4216 {
4217 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4218 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4219 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys, i);
4220 if (!pPage->cPresent)
4221 break;
4222 }
4223}
4224
4225
4226
4227/**
4228 * Clear references to shadowed pages in a 32 bits page directory.
4229 *
4230 * @param pPool The pool.
4231 * @param pPage The page.
4232 * @param pShwPD The shadow page directory (mapping of the page).
4233 */
4234DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4235{
4236 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4237 {
4238 if ( pShwPD->a[i].n.u1Present
4239 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4240 )
4241 {
4242 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4243 if (pSubPage)
4244 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4245 else
4246 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4247 }
4248 }
4249}
4250
4251/**
4252 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4253 *
4254 * @param pPool The pool.
4255 * @param pPage The page.
4256 * @param pShwPD The shadow page directory (mapping of the page).
4257 */
4258DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4259{
4260 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4261 {
4262 if ( pShwPD->a[i].n.u1Present
4263 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4264 )
4265 {
4266#ifdef PGM_WITH_LARGE_PAGES
4267 if (pShwPD->a[i].b.u1Size)
4268 {
4269 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4270 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4271 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */, i);
4272 }
4273 else
4274#endif
4275 {
4276 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4277 if (pSubPage)
4278 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4279 else
4280 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4281 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4282 }
4283 }
4284 }
4285}
4286
4287/**
4288 * Clear references to shadowed pages in a PAE page directory pointer table.
4289 *
4290 * @param pPool The pool.
4291 * @param pPage The page.
4292 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4293 */
4294DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4295{
4296 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4297 {
4298 if ( pShwPDPT->a[i].n.u1Present
4299 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4300 )
4301 {
4302 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4303 if (pSubPage)
4304 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4305 else
4306 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4307 }
4308 }
4309}
4310
4311
4312/**
4313 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4314 *
4315 * @param pPool The pool.
4316 * @param pPage The page.
4317 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4318 */
4319DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4320{
4321 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4322 {
4323 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
4324 if (pShwPDPT->a[i].n.u1Present)
4325 {
4326 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4327 if (pSubPage)
4328 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4329 else
4330 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4331 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4332 }
4333 }
4334}
4335
4336
4337/**
4338 * Clear references to shadowed pages in a 64-bit level 4 page table.
4339 *
4340 * @param pPool The pool.
4341 * @param pPage The page.
4342 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4343 */
4344DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4345{
4346 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4347 {
4348 if (pShwPML4->a[i].n.u1Present)
4349 {
4350 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4351 if (pSubPage)
4352 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4353 else
4354 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4355 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4356 }
4357 }
4358}
4359
4360
4361/**
4362 * Clear references to shadowed pages in an EPT page directory.
4363 *
4364 * @param pPool The pool.
4365 * @param pPage The page.
4366 * @param pShwPD The shadow page directory (mapping of the page).
4367 */
4368DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4369{
4370 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4371 {
4372 if (pShwPD->a[i].n.u1Present)
4373 {
4374#ifdef PGM_WITH_LARGE_PAGES
4375 if (pShwPD->a[i].b.u1Size)
4376 {
4377 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4378 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4379 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */, i);
4380 }
4381 else
4382#endif
4383 {
4384 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4385 if (pSubPage)
4386 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4387 else
4388 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4389 }
4390 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4391 }
4392 }
4393}
4394
4395
4396/**
4397 * Clear references to shadowed pages in an EPT page directory pointer table.
4398 *
4399 * @param pPool The pool.
4400 * @param pPage The page.
4401 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4402 */
4403DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4404{
4405 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4406 {
4407 if (pShwPDPT->a[i].n.u1Present)
4408 {
4409 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4410 if (pSubPage)
4411 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4412 else
4413 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4414 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4415 }
4416 }
4417}
4418
4419
4420/**
4421 * Clears all references made by this page.
4422 *
4423 * This includes other shadow pages and GC physical addresses.
4424 *
4425 * @param pPool The pool.
4426 * @param pPage The page.
4427 */
4428static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4429{
4430 /*
4431 * Map the shadow page and take action according to the page kind.
4432 */
4433 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
4434 switch (pPage->enmKind)
4435 {
4436 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4437 {
4438 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4439 void *pvGst;
4440 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4441 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4442 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4443 break;
4444 }
4445
4446 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4447 {
4448 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4449 void *pvGst;
4450 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4451 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
4452 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4453 break;
4454 }
4455
4456 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4457 {
4458 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4459 void *pvGst;
4460 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4461 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
4462 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4463 break;
4464 }
4465
4466 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4467 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4468 {
4469 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4470 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4471 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4472 break;
4473 }
4474
4475 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4476 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4477 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4478 {
4479 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4480 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4481 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4482 break;
4483 }
4484
4485 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4486 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4487 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4488 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4489 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4490 case PGMPOOLKIND_PAE_PD_PHYS:
4491 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4492 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4493 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4494 break;
4495
4496 case PGMPOOLKIND_32BIT_PD_PHYS:
4497 case PGMPOOLKIND_32BIT_PD:
4498 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4499 break;
4500
4501 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4502 case PGMPOOLKIND_PAE_PDPT:
4503 case PGMPOOLKIND_PAE_PDPT_PHYS:
4504 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4505 break;
4506
4507 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4508 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4509 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4510 break;
4511
4512 case PGMPOOLKIND_64BIT_PML4:
4513 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4514 break;
4515
4516 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4517 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4518 break;
4519
4520 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4521 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4522 break;
4523
4524 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4525 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4526 break;
4527
4528 default:
4529 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4530 }
4531
4532 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4533 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4534 ASMMemZeroPage(pvShw);
4535 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4536 pPage->fZeroed = true;
4537 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4538 Assert(!pPage->cPresent);
4539}
4540
4541/**
4542 * Flushes a pool page.
4543 *
4544 * This moves the page to the free list after removing all user references to it.
4545 *
4546 * @returns VBox status code.
4547 * @retval VINF_SUCCESS on success.
4548 * @param pPool The pool.
4549 * @param HCPhys The HC physical address of the shadow page.
4550 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4551 */
4552int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4553{
4554 PVM pVM = pPool->CTX_SUFF(pVM);
4555 bool fFlushRequired = false;
4556
4557 int rc = VINF_SUCCESS;
4558 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4559 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4560 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4561
4562 /*
4563 * Quietly reject any attempts at flushing any of the special root pages.
4564 */
4565 if (pPage->idx < PGMPOOL_IDX_FIRST)
4566 {
4567 AssertFailed(); /* can no longer happen */
4568 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4569 return VINF_SUCCESS;
4570 }
4571
4572 pgmLock(pVM);
4573
4574 /*
4575 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4576 */
4577 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4578 {
4579 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4580 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4581 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4582 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4583 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4584 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4585 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4586 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4587 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4588 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4589 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4590 pgmUnlock(pVM);
4591 return VINF_SUCCESS;
4592 }
4593
4594#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4595 /* Start a subset so we won't run out of mapping space. */
4596 PVMCPU pVCpu = VMMGetCpu(pVM);
4597 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4598#endif
4599
4600 /*
4601 * Mark the page as being in need of an ASMMemZeroPage().
4602 */
4603 pPage->fZeroed = false;
4604
4605#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4606 if (pPage->fDirty)
4607 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4608#endif
4609
4610 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4611 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4612 fFlushRequired = true;
4613
4614 /*
4615 * Clear the page.
4616 */
4617 pgmPoolTrackClearPageUsers(pPool, pPage);
4618 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4619 pgmPoolTrackDeref(pPool, pPage);
4620 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4621
4622 /*
4623 * Flush it from the cache.
4624 */
4625 pgmPoolCacheFlushPage(pPool, pPage);
4626
4627#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4628 /* Heavy stuff done. */
4629 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4630#endif
4631
4632 /*
4633 * Deregistering the monitoring.
4634 */
4635 if (pPage->fMonitored)
4636 rc = pgmPoolMonitorFlush(pPool, pPage);
4637
4638 /*
4639 * Free the page.
4640 */
4641 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4642 pPage->iNext = pPool->iFreeHead;
4643 pPool->iFreeHead = pPage->idx;
4644 pPage->enmKind = PGMPOOLKIND_FREE;
4645 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4646 pPage->GCPhys = NIL_RTGCPHYS;
4647 pPage->fReusedFlushPending = false;
4648
4649 pPool->cUsedPages--;
4650
4651 /* Flush the TLBs of all VCPUs if required. */
4652 if ( fFlushRequired
4653 && fFlush)
4654 {
4655 PGM_INVL_ALL_VCPU_TLBS(pVM);
4656 }
4657
4658 pgmUnlock(pVM);
4659 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4660 return rc;
4661}
4662
4663
4664/**
4665 * Frees a usage of a pool page.
4666 *
4667 * The caller is responsible to updating the user table so that it no longer
4668 * references the shadow page.
4669 *
4670 * @param pPool The pool.
4671 * @param HCPhys The HC physical address of the shadow page.
4672 * @param iUser The shadow page pool index of the user table.
4673 * @param iUserTable The index into the user table (shadowed).
4674 */
4675void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4676{
4677 PVM pVM = pPool->CTX_SUFF(pVM);
4678
4679 STAM_PROFILE_START(&pPool->StatFree, a);
4680 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4681 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4682 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4683 pgmLock(pVM);
4684 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4685 if (!pPage->fCached)
4686 pgmPoolFlushPage(pPool, pPage);
4687 pgmUnlock(pVM);
4688 STAM_PROFILE_STOP(&pPool->StatFree, a);
4689}
4690
4691
4692/**
4693 * Makes one or more free page free.
4694 *
4695 * @returns VBox status code.
4696 * @retval VINF_SUCCESS on success.
4697 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4698 *
4699 * @param pPool The pool.
4700 * @param enmKind Page table kind
4701 * @param iUser The user of the page.
4702 */
4703static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4704{
4705 PVM pVM = pPool->CTX_SUFF(pVM);
4706
4707 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4708
4709 /*
4710 * If the pool isn't full grown yet, expand it.
4711 */
4712 if ( pPool->cCurPages < pPool->cMaxPages
4713#if defined(IN_RC)
4714 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4715 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4716 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4717#endif
4718 )
4719 {
4720 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4721#ifdef IN_RING3
4722 int rc = PGMR3PoolGrow(pVM);
4723#else
4724 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4725#endif
4726 if (RT_FAILURE(rc))
4727 return rc;
4728 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4729 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4730 return VINF_SUCCESS;
4731 }
4732
4733 /*
4734 * Free one cached page.
4735 */
4736 return pgmPoolCacheFreeOne(pPool, iUser);
4737}
4738
4739/**
4740 * Allocates a page from the pool.
4741 *
4742 * This page may actually be a cached page and not in need of any processing
4743 * on the callers part.
4744 *
4745 * @returns VBox status code.
4746 * @retval VINF_SUCCESS if a NEW page was allocated.
4747 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4748 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4749 * @param pVM The VM handle.
4750 * @param GCPhys The GC physical address of the page we're gonna shadow.
4751 * For 4MB and 2MB PD entries, it's the first address the
4752 * shadow PT is covering.
4753 * @param enmKind The kind of mapping.
4754 * @param enmAccess Access type for the mapping (only relevant for big pages)
4755 * @param iUser The shadow page pool index of the user table.
4756 * @param iUserTable The index into the user table (shadowed).
4757 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4758 * @param fLockPage Lock the page
4759 */
4760int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4761{
4762 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4763 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4764 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4765 *ppPage = NULL;
4766 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4767 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4768 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4769
4770 pgmLock(pVM);
4771
4772 if (pPool->fCacheEnabled)
4773 {
4774 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4775 if (RT_SUCCESS(rc2))
4776 {
4777 if (fLockPage)
4778 pgmPoolLockPage(pPool, *ppPage);
4779 pgmUnlock(pVM);
4780 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4781 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4782 return rc2;
4783 }
4784 }
4785
4786 /*
4787 * Allocate a new one.
4788 */
4789 int rc = VINF_SUCCESS;
4790 uint16_t iNew = pPool->iFreeHead;
4791 if (iNew == NIL_PGMPOOL_IDX)
4792 {
4793 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4794 if (RT_FAILURE(rc))
4795 {
4796 pgmUnlock(pVM);
4797 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4798 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4799 return rc;
4800 }
4801 iNew = pPool->iFreeHead;
4802 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4803 }
4804
4805 /* unlink the free head */
4806 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4807 pPool->iFreeHead = pPage->iNext;
4808 pPage->iNext = NIL_PGMPOOL_IDX;
4809
4810 /*
4811 * Initialize it.
4812 */
4813 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4814 pPage->enmKind = enmKind;
4815 pPage->enmAccess = enmAccess;
4816 pPage->GCPhys = GCPhys;
4817 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4818 pPage->fMonitored = false;
4819 pPage->fCached = false;
4820#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4821 pPage->fDirty = false;
4822#endif
4823 pPage->fReusedFlushPending = false;
4824 pPage->cModifications = 0;
4825 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4826 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4827 pPage->cLocked = 0;
4828 pPage->cPresent = 0;
4829 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
4830 pPage->pvLastAccessHandlerFault = 0;
4831 pPage->cLastAccessHandlerCount = 0;
4832 pPage->pvLastAccessHandlerRip = 0;
4833
4834 /*
4835 * Insert into the tracking and cache. If this fails, free the page.
4836 */
4837 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4838 if (RT_FAILURE(rc3))
4839 {
4840 pPool->cUsedPages--;
4841 pPage->enmKind = PGMPOOLKIND_FREE;
4842 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4843 pPage->GCPhys = NIL_RTGCPHYS;
4844 pPage->iNext = pPool->iFreeHead;
4845 pPool->iFreeHead = pPage->idx;
4846 pgmUnlock(pVM);
4847 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4848 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4849 return rc3;
4850 }
4851
4852 /*
4853 * Commit the allocation, clear the page and return.
4854 */
4855#ifdef VBOX_WITH_STATISTICS
4856 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4857 pPool->cUsedPagesHigh = pPool->cUsedPages;
4858#endif
4859
4860 if (!pPage->fZeroed)
4861 {
4862 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4863 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4864 ASMMemZeroPage(pv);
4865 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4866 }
4867
4868 *ppPage = pPage;
4869 if (fLockPage)
4870 pgmPoolLockPage(pPool, pPage);
4871 pgmUnlock(pVM);
4872 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4873 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4874 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4875 return rc;
4876}
4877
4878
4879/**
4880 * Frees a usage of a pool page.
4881 *
4882 * @param pVM The VM handle.
4883 * @param HCPhys The HC physical address of the shadow page.
4884 * @param iUser The shadow page pool index of the user table.
4885 * @param iUserTable The index into the user table (shadowed).
4886 */
4887void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4888{
4889 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4890 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4891 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4892}
4893
4894/**
4895 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4896 *
4897 * @returns Pointer to the shadow page structure.
4898 * @param pPool The pool.
4899 * @param HCPhys The HC physical address of the shadow page.
4900 */
4901PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4902{
4903 PVM pVM = pPool->CTX_SUFF(pVM);
4904
4905 Assert(PGMIsLockOwner(pVM));
4906
4907 /*
4908 * Look up the page.
4909 */
4910 pgmLock(pVM);
4911 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4912 pgmUnlock(pVM);
4913
4914 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4915 return pPage;
4916}
4917
4918#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
4919/**
4920 * Flush the specified page if present
4921 *
4922 * @param pVM The VM handle.
4923 * @param GCPhys Guest physical address of the page to flush
4924 */
4925void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
4926{
4927 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4928
4929 VM_ASSERT_EMT(pVM);
4930
4931 /*
4932 * Look up the GCPhys in the hash.
4933 */
4934 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
4935 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
4936 if (i == NIL_PGMPOOL_IDX)
4937 return;
4938
4939 do
4940 {
4941 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4942 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
4943 {
4944 switch (pPage->enmKind)
4945 {
4946 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4947 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4948 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4949 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4950 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4951 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4952 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4953 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4954 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4955 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4956 case PGMPOOLKIND_64BIT_PML4:
4957 case PGMPOOLKIND_32BIT_PD:
4958 case PGMPOOLKIND_PAE_PDPT:
4959 {
4960 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
4961#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4962 if (pPage->fDirty)
4963 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
4964 else
4965#endif
4966 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
4967 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
4968 pgmPoolMonitorChainFlush(pPool, pPage);
4969 return;
4970 }
4971
4972 /* ignore, no monitoring. */
4973 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4974 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4975 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4976 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4977 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4978 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4979 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4980 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4981 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4982 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4983 case PGMPOOLKIND_ROOT_NESTED:
4984 case PGMPOOLKIND_PAE_PD_PHYS:
4985 case PGMPOOLKIND_PAE_PDPT_PHYS:
4986 case PGMPOOLKIND_32BIT_PD_PHYS:
4987 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4988 break;
4989
4990 default:
4991 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4992 }
4993 }
4994
4995 /* next */
4996 i = pPage->iNext;
4997 } while (i != NIL_PGMPOOL_IDX);
4998 return;
4999}
5000#endif /* IN_RING3 */
5001
5002#ifdef IN_RING3
5003
5004
5005/**
5006 * Reset CPU on hot plugging.
5007 *
5008 * @param pVM The VM handle.
5009 * @param pVCpu The virtual CPU.
5010 */
5011void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5012{
5013 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5014
5015 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5016 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5017 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5018}
5019
5020
5021/**
5022 * Flushes the entire cache.
5023 *
5024 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5025 * this and execute this CR3 flush.
5026 *
5027 * @param pPool The pool.
5028 */
5029void pgmR3PoolReset(PVM pVM)
5030{
5031 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5032
5033 Assert(PGMIsLockOwner(pVM));
5034 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5035 LogFlow(("pgmR3PoolReset:\n"));
5036
5037 /*
5038 * If there are no pages in the pool, there is nothing to do.
5039 */
5040 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5041 {
5042 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5043 return;
5044 }
5045
5046 /*
5047 * Exit the shadow mode since we're going to clear everything,
5048 * including the root page.
5049 */
5050 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5051 {
5052 PVMCPU pVCpu = &pVM->aCpus[i];
5053 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5054 }
5055
5056 /*
5057 * Nuke the free list and reinsert all pages into it.
5058 */
5059 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5060 {
5061 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5062
5063 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5064 if (pPage->fMonitored)
5065 pgmPoolMonitorFlush(pPool, pPage);
5066 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5067 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5068 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5069 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5070 pPage->cModifications = 0;
5071 pPage->GCPhys = NIL_RTGCPHYS;
5072 pPage->enmKind = PGMPOOLKIND_FREE;
5073 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5074 Assert(pPage->idx == i);
5075 pPage->iNext = i + 1;
5076 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5077 pPage->fSeenNonGlobal = false;
5078 pPage->fMonitored = false;
5079#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5080 pPage->fDirty = false;
5081#endif
5082 pPage->fCached = false;
5083 pPage->fReusedFlushPending = false;
5084 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5085 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5086 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5087 pPage->cLocked = 0;
5088 }
5089 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5090 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5091 pPool->cUsedPages = 0;
5092
5093 /*
5094 * Zap and reinitialize the user records.
5095 */
5096 pPool->cPresent = 0;
5097 pPool->iUserFreeHead = 0;
5098 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5099 const unsigned cMaxUsers = pPool->cMaxUsers;
5100 for (unsigned i = 0; i < cMaxUsers; i++)
5101 {
5102 paUsers[i].iNext = i + 1;
5103 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5104 paUsers[i].iUserTable = 0xfffffffe;
5105 }
5106 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5107
5108 /*
5109 * Clear all the GCPhys links and rebuild the phys ext free list.
5110 */
5111 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
5112 pRam;
5113 pRam = pRam->CTX_SUFF(pNext))
5114 {
5115 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5116 while (iPage-- > 0)
5117 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
5118 }
5119
5120 pPool->iPhysExtFreeHead = 0;
5121 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5122 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5123 for (unsigned i = 0; i < cMaxPhysExts; i++)
5124 {
5125 paPhysExts[i].iNext = i + 1;
5126 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5127 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5128 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5129 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5130 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5131 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5132 }
5133 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5134
5135 /*
5136 * Just zap the modified list.
5137 */
5138 pPool->cModifiedPages = 0;
5139 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5140
5141 /*
5142 * Clear the GCPhys hash and the age list.
5143 */
5144 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5145 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5146 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5147 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5148
5149#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5150 /* Clear all dirty pages. */
5151 pPool->idxFreeDirtyPage = 0;
5152 pPool->cDirtyPages = 0;
5153 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
5154 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
5155#endif
5156
5157 /*
5158 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5159 */
5160 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
5161 {
5162 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5163 pPage->iNext = NIL_PGMPOOL_IDX;
5164 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5165 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5166 pPage->cModifications = 0;
5167 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
5168 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5169 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5170 if (pPage->fMonitored)
5171 {
5172 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
5173 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
5174 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
5175 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5176 pPool->pszAccessHandler);
5177 AssertFatalRCSuccess(rc);
5178 pgmPoolHashInsert(pPool, pPage);
5179 }
5180 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5181 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5182 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5183 }
5184
5185 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5186 {
5187 /*
5188 * Re-enter the shadowing mode and assert Sync CR3 FF.
5189 */
5190 PVMCPU pVCpu = &pVM->aCpus[i];
5191 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5192 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5193 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5194 }
5195
5196 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5197}
5198#endif /* IN_RING3 */
5199
5200#ifdef LOG_ENABLED
5201static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5202{
5203 switch(enmKind)
5204 {
5205 case PGMPOOLKIND_INVALID:
5206 return "PGMPOOLKIND_INVALID";
5207 case PGMPOOLKIND_FREE:
5208 return "PGMPOOLKIND_FREE";
5209 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5210 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5211 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5212 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5213 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5214 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5215 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5216 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5217 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5218 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5219 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5220 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5221 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5222 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5223 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5224 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5225 case PGMPOOLKIND_32BIT_PD:
5226 return "PGMPOOLKIND_32BIT_PD";
5227 case PGMPOOLKIND_32BIT_PD_PHYS:
5228 return "PGMPOOLKIND_32BIT_PD_PHYS";
5229 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5230 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5231 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5232 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5233 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5234 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5235 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5236 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5237 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5238 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5239 case PGMPOOLKIND_PAE_PD_PHYS:
5240 return "PGMPOOLKIND_PAE_PD_PHYS";
5241 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5242 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5243 case PGMPOOLKIND_PAE_PDPT:
5244 return "PGMPOOLKIND_PAE_PDPT";
5245 case PGMPOOLKIND_PAE_PDPT_PHYS:
5246 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5247 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5248 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5249 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5250 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5251 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5252 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5253 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5254 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5255 case PGMPOOLKIND_64BIT_PML4:
5256 return "PGMPOOLKIND_64BIT_PML4";
5257 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5258 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5259 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5260 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5261 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5262 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5263 case PGMPOOLKIND_ROOT_NESTED:
5264 return "PGMPOOLKIND_ROOT_NESTED";
5265 }
5266 return "Unknown kind!";
5267}
5268#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette